diff --git a/src/jaxgym/envs/ergocub.py b/src/jaxgym/envs/ergocub.py index c3d45457c..769327b55 100644 --- a/src/jaxgym/envs/ergocub.py +++ b/src/jaxgym/envs/ergocub.py @@ -3,7 +3,7 @@ import multiprocessing import os import warnings -from typing import Any, Dict, List, Optional, Type, Union +from typing import Any, ClassVar, Dict, List, Optional, Type, Union import gymnasium as gym import jax.numpy as jnp @@ -17,7 +17,6 @@ from resolve_robotics_uri_py import resolve_robotics_uri from stable_baselines3 import PPO from stable_baselines3.common import vec_env as vec_env_sb -from stable_baselines3.common.env_util import make_vec_env from stable_baselines3.common.vec_env import VecMonitor, VecNormalize from torch import nn @@ -241,7 +240,7 @@ def jaxsim(self) -> JaxSim: def initial(self, rng: Any = None) -> StateType: """""" - assert jax.dtypes.issubdtype(rng, jax.dtypes.prng_key) + # TODO: assert jax.dtypes.issubdtype(rng, jax.dtypes.prng_key) # Split the key subkey1, subkey2 = jax.random.split(rng, num=2) @@ -281,10 +280,10 @@ def initial(self, rng: Any = None) -> StateType: ) # Return the simulation state - return dict( - simulator_data=simulator.data, - goal=jnp.array(goal_xy_position, dtype=float), - ) + return { + simulator_data: simulator.data, + goal: jnp.array(goal_xy_position, dtype=float), + } def transition( self, state: StateType, action: ActType, rng: Any = None @@ -309,20 +308,19 @@ def pre_step(self, sim: JaxSim) -> JaxSim: forces=jnp.atleast_1d(action), joint_names=model.joint_names() ) - return sim + return sim, None number_of_integration_steps = 40 # 0.010 # TODO 20 for having 0.010 # Stepping logic - with simulator.editable(validate=True) as simulator: - simulator, _ = simulator.step_over_horizon( - horizon_steps=number_of_integration_steps, - clear_inputs=False, - callback_handler=SetTorquesOverHorizon(), - ) + simulator, _ = simulator.step_over_horizon( + horizon_steps=number_of_integration_steps, + clear_inputs=False, + callback_handler=SetTorquesOverHorizon(), + ) # Return the new environment state (updated SimulatorData) - return state | dict(simulator_data=simulator.data) + return state | {simulator_data: simulator.data} def observation(self, state: StateType) -> ObsType: """""" @@ -353,7 +351,9 @@ def observation(self, state: StateType) -> ObsType: base_linear_velocity=model.base_velocity()[0:3], base_angular_velocity=model.base_velocity()[3:6], contact_state=model.in_contact( - link_names=[name for name in model.link_names() if "_ankle" in name] + link_names=tuple( + name for name in model.link_names() if "_ankle" in name + ) ), ) @@ -383,11 +383,11 @@ def reward( # reward += 100.0 * v_WB[0] # forward velocity reward -= jnp.linalg.norm(W_p_B[0:2] - W_p_xy_goal) # distance from goal reward += 1.0 * model_next.in_contact( - link_names=[ + link_names=tuple( name for name in model_next.link_names() if name.startswith("leg_") and name.endswith("_lower") - ] + ) ).any().astype(float) reward -= 0.1 * jnp.linalg.norm(action) / action.size # control cost @@ -479,13 +479,6 @@ class ErgoCubWalkEnvV0(JaxEnv): def __init__(self, render_mode: str | None = None, **kwargs: Any) -> None: """""" - from jaxgym.wrappers.jax import ( - ClipActionWrapper, - FlattenSpacesWrapper, - JaxTransformWrapper, - TimeLimit, - ) - func_env = ErgoCubWalkFuncEnvV0() func_env_wrapped = func_env @@ -506,7 +499,7 @@ def __init__(self, render_mode: str | None = None, **kwargs: Any) -> None: class ErgoCubWalkVectorEnvV0(JaxVectorEnv): """""" - metadata = dict() + metadata = {} def __init__( self, @@ -549,8 +542,6 @@ def make_jax_env( ) -> JaxEnv: """""" - # TODO: single env -> time limit with stable_baselines? - if max_episode_steps in {None, 0}: env = ErgoCubWalkFuncEnvV0() else: @@ -630,10 +621,10 @@ def tree_inverse_transpose( ) -> List[jtp.PyTree]: """""" - return [ + return tuple( jax.tree_util.tree_map(lambda leaf: leaf[i], pytree) for i in range(batch_size) - ] + ) def step_wait(self) -> vec_env_sb.base_vec_env.VecEnvStepReturn: """""" @@ -717,7 +708,7 @@ def seed(self, seed: Optional[int] = None) -> List[Union[None, int]]: seed = np.random.default_rng().integers(0, 2 ** 32 - 1, dtype="uint32") if np.array(seed, dtype="uint32") != np.array(seed): - raise ValueError(f"seed must be compatible with 'uint32' casting") + raise ValueError("seed must be compatible with 'uint32' casting") self._seed = seed return [seed] @@ -735,7 +726,7 @@ def make_vec_env_stable_baselines( env = jax_dataclass_env - vec_env_kwargs = vec_env_kwargs if vec_env_kwargs is not None else dict() + vec_env_kwargs = vec_env_kwargs if vec_env_kwargs is not None else {} vec_env = JaxVectorEnv( func_env=env, @@ -755,11 +746,11 @@ def make_vec_env_stable_baselines( os.environ["IGN_GAZEBO_RESOURCE_PATH"] = "/conda/share/" # DEBUG - max_episode_steps = 200 + MAX_EPISODE_STEP = 200 func_env = NaNHandlerWrapper(env=ErgoCubWalkFuncEnvV0()) - if max_episode_steps is not None: - func_env = TimeLimit(env=func_env, max_episode_steps=max_episode_steps) + if MAX_EPISODE_STEP is not None: + func_env = TimeLimit(env=func_env, max_episode_steps=MAX_EPISODE_STEP) func_env = ClipActionWrapper( env=SquashActionWrapper(env=ActionNoiseWrapper(env=func_env)), @@ -767,11 +758,9 @@ def make_vec_env_stable_baselines( vec_env = make_vec_env_stable_baselines( jax_dataclass_env=func_env, - n_envs=6000, + n_envs=256, seed=42, - vec_env_kwargs=dict( - jit_compile=True, - ), + vec_env_kwargs={jit_compile: True}, ) vec_env = VecMonitor( @@ -798,11 +787,11 @@ def make_vec_env_stable_baselines( target_kl=0.025, verbose=2, learning_rate=0.000_300, - policy_kwargs=dict( - activation_fn=nn.ReLU, - net_arch=dict(pi=[512, 512], vf=[512, 512]), - log_std_init=np.log(0.05), - ), + policy_kwargs={ + activation_fn: nn.ReLU, + net_arch: {pi: [512, 512], vf: [512, 512]}, + log_std_init: np.log(0.05), + }, ) print(model.policy)