Skip to content

Commit

Permalink
Mujoco Env (#1511)
Browse files Browse the repository at this point in the history
* add first iteration nao model

* add uv venv

* record videos

* fix dependencies

* change solver, add more options to naoxml

* fix dependencies (really)

* add nao_standup env

* add test script

* fix dependencies (really2)

* improve nao env

* use nao in scripts

* use face-down keyframe for standup

* improve naming

* tune motors

* add interactive script, fix joint ranges

* limit arms

* add tensorboard, better video naming

* improve feet, improve body

* add wandb

* add site head_center and use in reward

* add power-unlimited nao

* use bhuman model, update env

Co-authored-by: Arne Hasselbring <[email protected]>

* add face-down keyframe

* add site head_center

* fix reward xpos

* fix reward xpos

* use named access for site head_center

* rename env

* remove wrong comments

* update readme

* remove redundant model

* Add interactive_viewer

Co-authored-by: oleflb <[email protected]>
Co-authored-by: chatgpt <chatgpt>

* Use external model

Co-authored-by: oleflb <[email protected]>

* update readme, add gitignore for video and run files

* remove jupyter notebook

* change neovim to dev-dependency

* add entry_point of nao_env in test_script

* Add glfw dependency to readme

Co-authored-by: Alexander Schmidt <[email protected]>

* remove test_script

* remove ruff stuff and sort imports

---------

Co-authored-by: Maximilian Schmidt <[email protected]>
Co-authored-by: Arne Hasselbring <[email protected]>
Co-authored-by: oleflb <[email protected]>
Co-authored-by: Alexander Schmidt <[email protected]>
  • Loading branch information
5 people authored Dec 3, 2024
1 parent 1559bc1 commit 6784696
Show file tree
Hide file tree
Showing 9 changed files with 1,265 additions and 804 deletions.
604 changes: 182 additions & 422 deletions Cargo.lock

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions tools/machine-learning/mujoco/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
videos/
runs/
20 changes: 20 additions & 0 deletions tools/machine-learning/mujoco/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Setup

Make sure `glfw` is installed on your machine.

For python use [uv](https://docs.astral.sh/uv/).
After installing uv, run `uv sync` to install all python dependencies or directly execute an example from below.

## Example usage

To view the model:

- `uv run interactive_viewer.py`

To train the standup task:

- `uv run standup.py`

## To build a custom NAO environment

Add a new `MujocoEnv` class in the `nao_env` folder and add it to the `__init__.py` file.
13 changes: 13 additions & 0 deletions tools/machine-learning/mujoco/interactive_viewer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import os

import mujoco
from mujoco import viewer

os.environ["MUJOCO_GL"] = "egl"

model = mujoco.MjModel.from_xml_path("model/scene.xml")
data = mujoco.MjData(model)

# mujoco.mj_resetDataKeyframe(model, data, 2)

viewer.launch(model, data)
6 changes: 6 additions & 0 deletions tools/machine-learning/mujoco/nao_env/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from gymnasium.envs.mujoco.mujoco_env import MujocoEnv, MuJocoPyEnv # noqa: F401

# ^^^^^ so that user gets the correct error
# message if mujoco is not installed correctly

from nao_env.nao_standup import NaoStandup
132 changes: 132 additions & 0 deletions tools/machine-learning/mujoco/nao_env/nao_standup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
from pathlib import Path

import numpy as np
from gymnasium import utils
from gymnasium.envs.mujoco.mujoco_env import MujocoEnv
from gymnasium.spaces import Box

DEFAULT_CAMERA_CONFIG = {
"trackbodyid": 1,
"distance": 4.0,
"lookat": np.array((0.0, 0.0, 0.8925)),
"elevation": -20.0,
}


class NaoStandup(MujocoEnv, utils.EzPickle):
metadata = {
"render_modes": [
"human",
"rgb_array",
"depth_array",
],
}

def __init__(self, **kwargs) -> None:
observation_space = Box(
low=-np.inf,
high=np.inf,
shape=(661,),
dtype=np.float64,
)

MujocoEnv.__init__(
self,
str(Path.cwd().joinpath("model", "scene.xml")),
5,
observation_space=observation_space,
default_camera_config=DEFAULT_CAMERA_CONFIG,
**kwargs,
)
utils.EzPickle.__init__(self, **kwargs)

def _get_obs(self) -> np.ndarray:
data = self.data
return np.concatenate(
[
data.qpos.flat[2:],
data.qvel.flat,
data.cinert.flat,
data.cvel.flat,
data.qfrc_actuator.flat,
data.cfrc_ext.flat,
],
)

def step(self, a):
self.do_simulation(a, self.frame_skip)
data = self.data

head_center_id = self.model.site("head_center").id
head_center_z = data.site_xpos[head_center_id][2]
uph_cost = (head_center_z - 0) / self.model.opt.timestep

quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum()
quad_impact_cost = 0.5e-6 * np.square(data.cfrc_ext).sum()
quad_impact_cost = min(quad_impact_cost, 10)
reward = uph_cost - quad_ctrl_cost - quad_impact_cost + 1

if self.render_mode == "human":
self.render()
return (
self._get_obs(),
reward,
False,
False,
{
"reward_linup": uph_cost,
"reward_quadctrl": -quad_ctrl_cost,
"reward_impact": -quad_impact_cost,
},
)

def reset_model(self):
half_random_offset = 0.03
face_down_keyframe_qpos = [
0.452845,
0.219837,
0.0556939,
0.710551,
-0.0810676,
0.693965,
0.0834173,
-0.000571484,
0.0239414,
0.000401842,
-3.89047e-05,
-0.00175077,
0.357233,
0.0114063,
0.000212495,
0.000422366,
3.92127e-05,
-0.00133669,
0.356939,
0.0112884,
-0.000206283,
1.46985,
0.110264,
0.000766453,
-0.034298,
3.65047e-05,
1.47067,
-0.110094,
-0.00201064,
0.0342998,
-0.00126886,
]
self.set_state(
face_down_keyframe_qpos
+ self.np_random.uniform(
low=-half_random_offset,
high=half_random_offset,
size=self.model.nq,
),
self.init_qvel
+ self.np_random.uniform(
low=-half_random_offset,
high=half_random_offset,
size=self.model.nv,
),
)
return self._get_obs()
16 changes: 10 additions & 6 deletions tools/machine-learning/mujoco/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,16 @@ description = "Add your description here"
readme = "README.md"
requires-python = ">=3.10"
dependencies = [
"gymnasium[classic-control,mujoco]==0.28.1",
"mujoco>=3.2.4",
"gymnasium[classic-control]",
"ipykernel>=6.29.5",
"mediapy>=1.2.2",
"mujoco>=3.2.4",
"numpy>=2.1.2",
"scipy>=1.14.1",
"moviepy>=1.0.3",
"stable-baselines3>=2.3.2",
"wandb>=0.18.5",
"tensorboard>=2.18.0",
]

[tool.ruff]
Expand Down Expand Up @@ -64,8 +66,10 @@ ignore = [
[tool.ruff.lint.per-file-ignores]
"tests/*" = ["S101", "S603"]

[tool.ruff.lint.isort]
required-imports = ["from __future__ import annotations"]

[tool.uv]
dev-dependencies = ["pytest>=8.3.3", "ruff>=0.7.3"]

dev-dependencies = [
"neovim>=0.3.1",
"pytest>=8.3.3",
"ruff>=0.7.3",
]
102 changes: 86 additions & 16 deletions tools/machine-learning/mujoco/standup.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,93 @@
import gymnasium as gym
import os

import gymnasium as gym
import torch
import wandb
from stable_baselines3 import PPO
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.utils import get_device
from stable_baselines3.common.vec_env import DummyVecEnv, VecVideoRecorder
from wandb.integration.sb3 import WandbCallback

if get_device() != torch.device("cpu"):
NVIDIA_ICD_CONFIG_PATH = "/usr/share/glvnd/egl_vendor.d/10_nvidia.json"
if not os.path.exists(NVIDIA_ICD_CONFIG_PATH):
with open(NVIDIA_ICD_CONFIG_PATH, "w") as f:
_ = f.write("""{
"file_format_version" : "1.0.0",
"ICD" : {
"library_path" : "libEGL_nvidia.so.0"
}
}""")

# Configure MuJoCo to use the EGL rendering backend (requires GPU)
os.environ["MUJOCO_GL"] = "egl"


# taken from https://gymnasium.farama.org/main/_modules/gymnasium/wrappers/record_video/
def capped_cubic_video_schedule(episode_id: int) -> bool:
"""The default episode trigger.
This function will trigger recordings at the episode indices 0, 1, 8, 27, ..., :math:`k^3`, ..., 729, 1000, 2000, 3000, ...
Args:
episode_id: The episode number
Returns:
If to apply a video schedule number
"""
if episode_id < 10000:
return int(round(episode_id ** (1.0 / 3))) ** 3 == episode_id
else:
return episode_id % 10000 == 0


gym.register(
id="NaoStandup-v1",
entry_point="nao_env:NaoStandup",
max_episode_steps=2500,
)

config = {
"policy_type": "MlpPolicy",
"total_timesteps": 1000000,
"env_name": "NaoStandup-v1",
"render_mode": "rgb_array",
}


env = gym.make("CartPole-v1", render_mode="human")
run = wandb.init(
project="nao_standup",
config=config,
sync_tensorboard=True,
monitor_gym=True,
save_code=False,
mode="disabled",
)

model = PPO("MlpPolicy", env, verbose=1)
model.learn(total_timesteps=10_000)

vec_env = model.get_env()
if vec_env is None:
raise ValueError("Model does not have a VecEnv")
def make_env():
env = gym.make(config["env_name"], render_mode=config["render_mode"])
env = Monitor(env) # record stats such as returns
return env

obs = vec_env.reset()
for i in range(1000):
action, _states = model.predict(obs, deterministic=True)
obs, reward, done, info = vec_env.step(action)
vec_env.render()
# VecEnv resets automatically
# if done:
# obs = env.reset()

env.close()
env = DummyVecEnv([make_env])
env = VecVideoRecorder(
env,
f"videos/{run.id}",
record_video_trigger=capped_cubic_video_schedule,
video_length=200,
)
model = PPO(
config["policy_type"], env, verbose=1, tensorboard_log=f"runs/{run.id}"
)
model.learn(
total_timesteps=config["total_timesteps"],
callback=WandbCallback(
gradient_save_freq=100,
model_save_path=f"models/{run.id}",
verbose=2,
),
)
run.finish()
Loading

0 comments on commit 6784696

Please sign in to comment.