From 9670411c5d72a62a7ac51de0656b1cb5edb39050 Mon Sep 17 00:00:00 2001 From: ariel Date: Fri, 23 Aug 2024 17:00:24 +0200 Subject: [PATCH] Remove all mentions of reward_range --- .../gymnasium_basics/implementing_custom_wrappers.py | 5 ++--- gymnasium/core.py | 6 ++---- gymnasium/envs/functional_jax_env.py | 4 ---- 3 files changed, 4 insertions(+), 11 deletions(-) diff --git a/docs/tutorials/gymnasium_basics/implementing_custom_wrappers.py b/docs/tutorials/gymnasium_basics/implementing_custom_wrappers.py index 4ba67720a..be76ba4b5 100644 --- a/docs/tutorials/gymnasium_basics/implementing_custom_wrappers.py +++ b/docs/tutorials/gymnasium_basics/implementing_custom_wrappers.py @@ -82,7 +82,7 @@ def action(self, act): # ------------------------------------------------ # Reward wrappers are used to transform the reward that is returned by an environment. # As for the previous wrappers, you need to specify that transformation by implementing the -# :meth:`gymnasium.RewardWrapper.reward` method. Also, you might want to update the reward range of the wrapper. +# :meth:`gymnasium.RewardWrapper.reward` method. # # Let us look at an example: Sometimes (especially when we do not have control over the reward # because it is intrinsic), we want to clip the reward to a range to gain some numerical stability. @@ -96,7 +96,6 @@ def __init__(self, env, min_reward, max_reward): super().__init__(env) self.min_reward = min_reward self.max_reward = max_reward - self.reward_range = (min_reward, max_reward) def reward(self, r: SupportsFloat) -> SupportsFloat: return np.clip(r, self.min_reward, self.max_reward) @@ -110,7 +109,7 @@ def reward(self, r: SupportsFloat) -> SupportsFloat: # Such wrappers can be implemented by inheriting from :class:`gymnasium.Wrapper`. # # - You can set a new action or observation space by defining ``self.action_space`` or ``self.observation_space`` in ``__init__``, respectively -# - You can set new metadata and reward range by defining ``self.metadata`` and ``self.reward_range`` in ``__init__``, respectively +# - You can set new metadata by defining ``self.metadata`` in ``__init__`` # - You can override :meth:`gymnasium.Wrapper.step`, :meth:`gymnasium.Wrapper.render`, :meth:`gymnasium.Wrapper.close` etc. # # If you do this, you can access the environment that was passed diff --git a/gymnasium/core.py b/gymnasium/core.py index 1e6403950..96207e5e4 100644 --- a/gymnasium/core.py +++ b/gymnasium/core.py @@ -289,8 +289,8 @@ class Wrapper( """Wraps a :class:`gymnasium.Env` to allow a modular transformation of the :meth:`step` and :meth:`reset` methods. This class is the base class of all wrappers to change the behavior of the underlying environment. - Wrappers that inherit from this class can modify the :attr:`action_space`, :attr:`observation_space`, - :attr:`reward_range` and :attr:`metadata` attributes, without changing the underlying environment's attributes. + Wrappers that inherit from this class can modify the :attr:`action_space`, :attr:`observation_space` + and :attr:`metadata` attributes, without changing the underlying environment's attributes. Moreover, the behavior of the :meth:`step` and :meth:`reset` methods can be changed by these wrappers. Some attributes (:attr:`spec`, :attr:`render_mode`, :attr:`np_random`) will point back to the wrapper's environment @@ -568,8 +568,6 @@ class RewardWrapper(Wrapper[ObsType, ActType, ObsType, ActType]): If you would like to apply a function to the reward that is returned by the base environment before passing it to learning code, you can simply inherit from :class:`RewardWrapper` and overwrite the method :meth:`reward` to implement that transformation. - This transformation might change the :attr:`reward_range`; to specify the :attr:`reward_range` of your wrapper, - you can simply define :attr:`self.reward_range` in :meth:`__init__`. """ def __init__(self, env: Env[ObsType, ActType]): diff --git a/gymnasium/envs/functional_jax_env.py b/gymnasium/envs/functional_jax_env.py index 1c3c32576..1db7a0281 100644 --- a/gymnasium/envs/functional_jax_env.py +++ b/gymnasium/envs/functional_jax_env.py @@ -26,7 +26,6 @@ def __init__( func_env: FuncEnv, metadata: dict[str, Any] | None = None, render_mode: str | None = None, - reward_range: tuple[float, float] = (-float("inf"), float("inf")), spec: EnvSpec | None = None, ): """Initialize the environment from a FuncEnv.""" @@ -41,7 +40,6 @@ def __init__( self.metadata = metadata self.render_mode = render_mode - self.reward_range = reward_range self.spec = spec @@ -112,7 +110,6 @@ def __init__( max_episode_steps: int = 0, metadata: dict[str, Any] | None = None, render_mode: str | None = None, - reward_range: tuple[float, float] = (-float("inf"), float("inf")), spec: EnvSpec | None = None, ): """Initialize the environment from a FuncEnv.""" @@ -131,7 +128,6 @@ def __init__( self.metadata = metadata self.render_mode = render_mode - self.reward_range = reward_range self.spec = spec self.time_limit = max_episode_steps