-
Notifications
You must be signed in to change notification settings - Fork 56
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* add first iteration nao model * add uv venv * record videos * fix dependencies * change solver, add more options to naoxml * fix dependencies (really) * add nao_standup env * add test script * fix dependencies (really2) * improve nao env * use nao in scripts * use face-down keyframe for standup * improve naming * tune motors * add interactive script, fix joint ranges * limit arms * add tensorboard, better video naming * improve feet, improve body * add wandb * add site head_center and use in reward * add power-unlimited nao * use bhuman model, update env Co-authored-by: Arne Hasselbring <[email protected]> * add face-down keyframe * add site head_center * fix reward xpos * fix reward xpos * use named access for site head_center * rename env * remove wrong comments * update readme * remove redundant model * Add interactive_viewer Co-authored-by: oleflb <[email protected]> Co-authored-by: chatgpt <chatgpt> * Use external model Co-authored-by: oleflb <[email protected]> * update readme, add gitignore for video and run files * remove jupyter notebook * change neovim to dev-dependency * add entry_point of nao_env in test_script * Add glfw dependency to readme Co-authored-by: Alexander Schmidt <[email protected]> * remove test_script * remove ruff stuff and sort imports --------- Co-authored-by: Maximilian Schmidt <[email protected]> Co-authored-by: Arne Hasselbring <[email protected]> Co-authored-by: oleflb <[email protected]> Co-authored-by: Alexander Schmidt <[email protected]>
- Loading branch information
1 parent
1559bc1
commit 6784696
Showing
9 changed files
with
1,265 additions
and
804 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
videos/ | ||
runs/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
# Setup | ||
|
||
Make sure `glfw` is installed on your machine. | ||
|
||
For python use [uv](https://docs.astral.sh/uv/). | ||
After installing uv, run `uv sync` to install all python dependencies or directly execute an example from below. | ||
|
||
## Example usage | ||
|
||
To view the model: | ||
|
||
- `uv run interactive_viewer.py` | ||
|
||
To train the standup task: | ||
|
||
- `uv run standup.py` | ||
|
||
## To build a custom NAO environment | ||
|
||
Add a new `MujocoEnv` class in the `nao_env` folder and add it to the `__init__.py` file. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
import os | ||
|
||
import mujoco | ||
from mujoco import viewer | ||
|
||
os.environ["MUJOCO_GL"] = "egl" | ||
|
||
model = mujoco.MjModel.from_xml_path("model/scene.xml") | ||
data = mujoco.MjData(model) | ||
|
||
# mujoco.mj_resetDataKeyframe(model, data, 2) | ||
|
||
viewer.launch(model, data) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
from gymnasium.envs.mujoco.mujoco_env import MujocoEnv, MuJocoPyEnv # noqa: F401 | ||
|
||
# ^^^^^ so that user gets the correct error | ||
# message if mujoco is not installed correctly | ||
|
||
from nao_env.nao_standup import NaoStandup |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,132 @@ | ||
from pathlib import Path | ||
|
||
import numpy as np | ||
from gymnasium import utils | ||
from gymnasium.envs.mujoco.mujoco_env import MujocoEnv | ||
from gymnasium.spaces import Box | ||
|
||
DEFAULT_CAMERA_CONFIG = { | ||
"trackbodyid": 1, | ||
"distance": 4.0, | ||
"lookat": np.array((0.0, 0.0, 0.8925)), | ||
"elevation": -20.0, | ||
} | ||
|
||
|
||
class NaoStandup(MujocoEnv, utils.EzPickle): | ||
metadata = { | ||
"render_modes": [ | ||
"human", | ||
"rgb_array", | ||
"depth_array", | ||
], | ||
} | ||
|
||
def __init__(self, **kwargs) -> None: | ||
observation_space = Box( | ||
low=-np.inf, | ||
high=np.inf, | ||
shape=(661,), | ||
dtype=np.float64, | ||
) | ||
|
||
MujocoEnv.__init__( | ||
self, | ||
str(Path.cwd().joinpath("model", "scene.xml")), | ||
5, | ||
observation_space=observation_space, | ||
default_camera_config=DEFAULT_CAMERA_CONFIG, | ||
**kwargs, | ||
) | ||
utils.EzPickle.__init__(self, **kwargs) | ||
|
||
def _get_obs(self) -> np.ndarray: | ||
data = self.data | ||
return np.concatenate( | ||
[ | ||
data.qpos.flat[2:], | ||
data.qvel.flat, | ||
data.cinert.flat, | ||
data.cvel.flat, | ||
data.qfrc_actuator.flat, | ||
data.cfrc_ext.flat, | ||
], | ||
) | ||
|
||
def step(self, a): | ||
self.do_simulation(a, self.frame_skip) | ||
data = self.data | ||
|
||
head_center_id = self.model.site("head_center").id | ||
head_center_z = data.site_xpos[head_center_id][2] | ||
uph_cost = (head_center_z - 0) / self.model.opt.timestep | ||
|
||
quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum() | ||
quad_impact_cost = 0.5e-6 * np.square(data.cfrc_ext).sum() | ||
quad_impact_cost = min(quad_impact_cost, 10) | ||
reward = uph_cost - quad_ctrl_cost - quad_impact_cost + 1 | ||
|
||
if self.render_mode == "human": | ||
self.render() | ||
return ( | ||
self._get_obs(), | ||
reward, | ||
False, | ||
False, | ||
{ | ||
"reward_linup": uph_cost, | ||
"reward_quadctrl": -quad_ctrl_cost, | ||
"reward_impact": -quad_impact_cost, | ||
}, | ||
) | ||
|
||
def reset_model(self): | ||
half_random_offset = 0.03 | ||
face_down_keyframe_qpos = [ | ||
0.452845, | ||
0.219837, | ||
0.0556939, | ||
0.710551, | ||
-0.0810676, | ||
0.693965, | ||
0.0834173, | ||
-0.000571484, | ||
0.0239414, | ||
0.000401842, | ||
-3.89047e-05, | ||
-0.00175077, | ||
0.357233, | ||
0.0114063, | ||
0.000212495, | ||
0.000422366, | ||
3.92127e-05, | ||
-0.00133669, | ||
0.356939, | ||
0.0112884, | ||
-0.000206283, | ||
1.46985, | ||
0.110264, | ||
0.000766453, | ||
-0.034298, | ||
3.65047e-05, | ||
1.47067, | ||
-0.110094, | ||
-0.00201064, | ||
0.0342998, | ||
-0.00126886, | ||
] | ||
self.set_state( | ||
face_down_keyframe_qpos | ||
+ self.np_random.uniform( | ||
low=-half_random_offset, | ||
high=half_random_offset, | ||
size=self.model.nq, | ||
), | ||
self.init_qvel | ||
+ self.np_random.uniform( | ||
low=-half_random_offset, | ||
high=half_random_offset, | ||
size=self.model.nv, | ||
), | ||
) | ||
return self._get_obs() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,23 +1,93 @@ | ||
import gymnasium as gym | ||
import os | ||
|
||
import gymnasium as gym | ||
import torch | ||
import wandb | ||
from stable_baselines3 import PPO | ||
from stable_baselines3.common.monitor import Monitor | ||
from stable_baselines3.common.utils import get_device | ||
from stable_baselines3.common.vec_env import DummyVecEnv, VecVideoRecorder | ||
from wandb.integration.sb3 import WandbCallback | ||
|
||
if get_device() != torch.device("cpu"): | ||
NVIDIA_ICD_CONFIG_PATH = "/usr/share/glvnd/egl_vendor.d/10_nvidia.json" | ||
if not os.path.exists(NVIDIA_ICD_CONFIG_PATH): | ||
with open(NVIDIA_ICD_CONFIG_PATH, "w") as f: | ||
_ = f.write("""{ | ||
"file_format_version" : "1.0.0", | ||
"ICD" : { | ||
"library_path" : "libEGL_nvidia.so.0" | ||
} | ||
}""") | ||
|
||
# Configure MuJoCo to use the EGL rendering backend (requires GPU) | ||
os.environ["MUJOCO_GL"] = "egl" | ||
|
||
|
||
# taken from https://gymnasium.farama.org/main/_modules/gymnasium/wrappers/record_video/ | ||
def capped_cubic_video_schedule(episode_id: int) -> bool: | ||
"""The default episode trigger. | ||
This function will trigger recordings at the episode indices 0, 1, 8, 27, ..., :math:`k^3`, ..., 729, 1000, 2000, 3000, ... | ||
Args: | ||
episode_id: The episode number | ||
Returns: | ||
If to apply a video schedule number | ||
""" | ||
if episode_id < 10000: | ||
return int(round(episode_id ** (1.0 / 3))) ** 3 == episode_id | ||
else: | ||
return episode_id % 10000 == 0 | ||
|
||
|
||
gym.register( | ||
id="NaoStandup-v1", | ||
entry_point="nao_env:NaoStandup", | ||
max_episode_steps=2500, | ||
) | ||
|
||
config = { | ||
"policy_type": "MlpPolicy", | ||
"total_timesteps": 1000000, | ||
"env_name": "NaoStandup-v1", | ||
"render_mode": "rgb_array", | ||
} | ||
|
||
|
||
env = gym.make("CartPole-v1", render_mode="human") | ||
run = wandb.init( | ||
project="nao_standup", | ||
config=config, | ||
sync_tensorboard=True, | ||
monitor_gym=True, | ||
save_code=False, | ||
mode="disabled", | ||
) | ||
|
||
model = PPO("MlpPolicy", env, verbose=1) | ||
model.learn(total_timesteps=10_000) | ||
|
||
vec_env = model.get_env() | ||
if vec_env is None: | ||
raise ValueError("Model does not have a VecEnv") | ||
def make_env(): | ||
env = gym.make(config["env_name"], render_mode=config["render_mode"]) | ||
env = Monitor(env) # record stats such as returns | ||
return env | ||
|
||
obs = vec_env.reset() | ||
for i in range(1000): | ||
action, _states = model.predict(obs, deterministic=True) | ||
obs, reward, done, info = vec_env.step(action) | ||
vec_env.render() | ||
# VecEnv resets automatically | ||
# if done: | ||
# obs = env.reset() | ||
|
||
env.close() | ||
env = DummyVecEnv([make_env]) | ||
env = VecVideoRecorder( | ||
env, | ||
f"videos/{run.id}", | ||
record_video_trigger=capped_cubic_video_schedule, | ||
video_length=200, | ||
) | ||
model = PPO( | ||
config["policy_type"], env, verbose=1, tensorboard_log=f"runs/{run.id}" | ||
) | ||
model.learn( | ||
total_timesteps=config["total_timesteps"], | ||
callback=WandbCallback( | ||
gradient_save_freq=100, | ||
model_save_path=f"models/{run.id}", | ||
verbose=2, | ||
), | ||
) | ||
run.finish() |
Oops, something went wrong.