Remove reward_range (#1167)

RedTachyon · web-flow · commit 4d15c99c3dae · 2024-09-19T15:58:52.000+01:00
diff --git a/docs/tutorials/gymnasium_basics/implementing_custom_wrappers.py b/docs/tutorials/gymnasium_basics/implementing_custom_wrappers.py
@@ -82,7 +82,7 @@ def action(self, act):
 # ------------------------------------------------
 # Reward wrappers are used to transform the reward that is returned by an environment.
 # As for the previous wrappers, you need to specify that transformation by implementing the
-# :meth:`gymnasium.RewardWrapper.reward` method. Also, you might want to update the reward range of the wrapper.
+# :meth:`gymnasium.RewardWrapper.reward` method.
 #
 # Let us look at an example: Sometimes (especially when we do not have control over the reward
 # because it is intrinsic), we want to clip the reward to a range to gain some numerical stability.
@@ -96,7 +96,6 @@ def __init__(self, env, min_reward, max_reward):
         super().__init__(env)
         self.min_reward = min_reward
         self.max_reward = max_reward
-        self.reward_range = (min_reward, max_reward)
 
     def reward(self, r: SupportsFloat) -> SupportsFloat:
         return np.clip(r, self.min_reward, self.max_reward)
@@ -110,7 +109,7 @@ def reward(self, r: SupportsFloat) -> SupportsFloat:
 # Such wrappers can be implemented by inheriting from :class:`gymnasium.Wrapper`.
 #
 # - You can set a new action or observation space by defining ``self.action_space`` or ``self.observation_space`` in ``__init__``, respectively
-# - You can set new metadata and reward range by defining ``self.metadata`` and ``self.reward_range`` in ``__init__``, respectively
+# - You can set new metadata by defining ``self.metadata`` in ``__init__``
 # - You can override :meth:`gymnasium.Wrapper.step`, :meth:`gymnasium.Wrapper.render`, :meth:`gymnasium.Wrapper.close` etc.
 #
 # If you do this, you can access the environment that was passed
diff --git a/gymnasium/core.py b/gymnasium/core.py
@@ -289,8 +289,8 @@ class Wrapper(
     """Wraps a :class:`gymnasium.Env` to allow a modular transformation of the :meth:`step` and :meth:`reset` methods.
 
     This class is the base class of all wrappers to change the behavior of the underlying environment.
-    Wrappers that inherit from this class can modify the :attr:`action_space`, :attr:`observation_space`,
-    :attr:`reward_range` and :attr:`metadata` attributes, without changing the underlying environment's attributes.
+    Wrappers that inherit from this class can modify the :attr:`action_space`, :attr:`observation_space`
+    and :attr:`metadata` attributes, without changing the underlying environment's attributes.
     Moreover, the behavior of the :meth:`step` and :meth:`reset` methods can be changed by these wrappers.
 
     Some attributes (:attr:`spec`, :attr:`render_mode`, :attr:`np_random`) will point back to the wrapper's environment
@@ -568,8 +568,6 @@ class RewardWrapper(Wrapper[ObsType, ActType, ObsType, ActType]):
     If you would like to apply a function to the reward that is returned by the base environment before
     passing it to learning code, you can simply inherit from :class:`RewardWrapper` and overwrite the method
     :meth:`reward` to implement that transformation.
-    This transformation might change the :attr:`reward_range`; to specify the :attr:`reward_range` of your wrapper,
-    you can simply define :attr:`self.reward_range` in :meth:`__init__`.
     """
 
     def __init__(self, env: Env[ObsType, ActType]):
diff --git a/gymnasium/envs/functional_jax_env.py b/gymnasium/envs/functional_jax_env.py
@@ -26,7 +26,6 @@ def __init__(
         func_env: FuncEnv,
         metadata: dict[str, Any] | None = None,
         render_mode: str | None = None,
-        reward_range: tuple[float, float] = (-float("inf"), float("inf")),
         spec: EnvSpec | None = None,
     ):
         """Initialize the environment from a FuncEnv."""
@@ -41,7 +40,6 @@ def __init__(
 
         self.metadata = metadata
         self.render_mode = render_mode
-        self.reward_range = reward_range
 
         self.spec = spec
 
@@ -112,7 +110,6 @@ def __init__(
         max_episode_steps: int = 0,
         metadata: dict[str, Any] | None = None,
         render_mode: str | None = None,
-        reward_range: tuple[float, float] = (-float("inf"), float("inf")),
         spec: EnvSpec | None = None,
     ):
         """Initialize the environment from a FuncEnv."""
@@ -131,7 +128,6 @@ def __init__(
 
         self.metadata = metadata
         self.render_mode = render_mode
-        self.reward_range = reward_range
         self.spec = spec
         self.time_limit = max_episode_steps