google · btaba · May 20, 2024 · Mar 27, 2024 · Mar 27, 2024 · Mar 27, 2024
diff --git a/brax/envs/base.py b/brax/envs/base.py
@@ -26,6 +26,7 @@
 from brax.spring import pipeline as s_pipeline
 from flax import struct
 import jax
+import jax.numpy as jnp
 import mujoco
 from mujoco import mjx
 import numpy as np
@@ -128,6 +129,22 @@ def f(state, _):
       )
 
     return jax.lax.scan(f, pipeline_state, (), self._n_frames)[0]
+
+  def scale_and_clip_actions(self, action: jax.Array) -> jax.Array:
+    """
+    Scale an input action from `[-1, 1]` up/down to the control limits
+    of each actuator in an the model.
+
+    We assume the action is in `[-1, 1]` and apply a linear transform
+    to scale the control to `[a, b]` with `u = (u + 1)(b-a)/2 + a`
+    """
+    action_min = self.sys.actuator.ctrl_range[:, 0]
+    action_max = self.sys.actuator.ctrl_range[:, 1]
+
+    def rescale(x):
+      return (x + 1) * (action_max - action_min) / 2 + action_min
+
+    return jnp.clip(rescale(action), a_min=action_max, a_max=action_max)
 
   @property
   def dt(self) -> jax.Array:

diff --git a/brax/envs/humanoid.py b/brax/envs/humanoid.py
@@ -256,6 +256,7 @@ def reset(self, rng: jax.Array) -> State:
   def step(self, state: State, action: jax.Array) -> State:
     """Runs one timestep of the environment's dynamics."""
     pipeline_state0 = state.pipeline_state
+    action = self.scale_and_clip_actions(action)
     pipeline_state = self.pipeline_step(pipeline_state0, action)
 
     com_before, *_ = self._com(pipeline_state0)

diff --git a/brax/envs/humanoidstandup.py b/brax/envs/humanoidstandup.py
@@ -219,6 +219,7 @@ def reset(self, rng: jax.Array) -> State:
 
   def step(self, state: State, action: jax.Array) -> State:
     """Runs one timestep of the environment's dynamics."""
+    action = self.scale_and_clip_actions(action)
     pipeline_state = self.pipeline_step(state.pipeline_state, action)
 
     pos_after = pipeline_state.x.pos[0, 2]  # z coordinate of torso

diff --git a/brax/envs/inverted_double_pendulum.py b/brax/envs/inverted_double_pendulum.py
@@ -46,14 +46,14 @@ class InvertedDoublePendulum(PipelineEnv):
 
   The agent take a 1-element vector for actions.
 
-  The action space is a continuous `(action)` in `[-3, 3]`, where `action`
+  The action space is a continuous `(action)` in `[-1, 1]`, where `action`
   represents the numerical force applied to the cart (with magnitude
   representing the amount of force and sign representing the direction)
 
   | Num | Action                    | Control Min | Control Max | Name (in
   corresponding config) | Joint | Unit      |
   |-----|---------------------------|-------------|-------------|--------------------------------|-------|-----------|
-  | 0   | Force applied on the cart | -3          | 3           | slider
+  | 0   | Force applied on the cart | -1          | 1           | slider
   | slide | Force (N) |
 
   ### Observation Space

diff --git a/brax/envs/inverted_pendulum.py b/brax/envs/inverted_pendulum.py
@@ -46,6 +46,9 @@ class InvertedPendulum(PipelineEnv):
   continuous `(action)` in `[-3, 3]`, where `action` represents the numerical
   force applied to the cart (with magnitude representing the amount of force and
   sign representing the direction)
+
+  Actions are assumed to be within `[-1, 1]` and are (linearly) scaled 
+  to `[-3, 3]` within the environment's `step()` call.
 
   | Num | Action                    | Control Min | Control Max | Name (in
   corresponding config) | Joint | Unit      |
@@ -129,6 +132,7 @@ def reset(self, rng: jax.Array) -> State:
 
   def step(self, state: State, action: jax.Array) -> State:
     """Run one timestep of the environment's dynamics."""
+    action = self.scale_and_clip_actions(action)
     pipeline_state = self.pipeline_step(state.pipeline_state, action)
     obs = self._get_obs(pipeline_state)
     reward = 1.0

diff --git a/brax/envs/pusher.py b/brax/envs/pusher.py
@@ -42,7 +42,10 @@ class Pusher(PipelineEnv):
   ### Action Space
 
   The action space is a `Box(-2, 2, (7,), float32)`. An action `(a, b)`
-  represents the torques applied at the hinge joints.
+  represents the torques applied at the hinge joints. 
+
+  Actions are assumed to be within `[-1, 1]` and are (linearly) scaled 
+  to `[-2, 2]` within the environment's `step()` call.
 
   | Num | Action                                        | Control Min | Control Max | Name (in corresponding config) | Joint | Unit         |
   |-----|-----------------------------------------------|-------------|-------------|--------------------------------|-------|--------------|
@@ -193,6 +196,9 @@ def reset(self, rng: jax.Array) -> State:
     return State(pipeline_state, obs, reward, done, metrics)
 
   def step(self, state: State, action: jax.Array) -> State:
+    action = self.scale_and_clip_actions(action)
+    pipeline_state = self.pipeline_step(state.pipeline_state, action)
+
     assert state.pipeline_state is not None
     x_i = state.pipeline_state.x.vmap().do(
         base.Transform.create(pos=self.sys.link.inertia.transform.pos)
@@ -205,8 +211,6 @@ def step(self, state: State, action: jax.Array) -> State:
     reward_ctrl = -jp.square(action).sum()
     reward = reward_dist + 0.1 * reward_ctrl + 0.5 * reward_near
 
-    pipeline_state = self.pipeline_step(state.pipeline_state, action)
-
     obs = self._get_obs(pipeline_state)
     state.metrics.update(
         reward_near=reward_near,