From a357475a903a7c7fc783c511c9dee92f54eb1480 Mon Sep 17 00:00:00 2001 From: Joseph Date: Fri, 20 Oct 2023 15:55:48 -0400 Subject: [PATCH] log reward clipping is now -100 (much smaller) --- src/gfn/env.py | 4 ++-- src/gfn/gym/box.py | 2 +- src/gfn/gym/discrete_ebm.py | 2 +- src/gfn/gym/hypergrid.py | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/gfn/env.py b/src/gfn/env.py index 18f2af7b..c16d2018 100644 --- a/src/gfn/env.py +++ b/src/gfn/env.py @@ -23,7 +23,7 @@ def __init__( sf: Optional[TT["state_shape", torch.float]] = None, device_str: Optional[str] = None, preprocessor: Optional[Preprocessor] = None, - log_reward_clip: Optional[float] = -20.0, + log_reward_clip: Optional[float] = -100., ): """Initializes an environment. @@ -221,7 +221,7 @@ def __init__( sf: Optional[TT["state_shape", torch.float]] = None, device_str: Optional[str] = None, preprocessor: Optional[Preprocessor] = None, - log_reward_clip: Optional[float] = -20.0, + log_reward_clip: Optional[float] = -100.0, ): """Initializes a discrete environment. diff --git a/src/gfn/gym/box.py b/src/gfn/gym/box.py index 414f0389..28eb0893 100644 --- a/src/gfn/gym/box.py +++ b/src/gfn/gym/box.py @@ -20,7 +20,7 @@ def __init__( R2: float = 2.0, epsilon: float = 1e-4, device_str: Literal["cpu", "cuda"] = "cpu", - log_reward_clip: float = -20., + log_reward_clip: float = -100., ): assert 0 < delta <= 1, "delta must be in (0, 1]" self.delta = delta diff --git a/src/gfn/gym/discrete_ebm.py b/src/gfn/gym/discrete_ebm.py index 3bc1a2aa..7839c568 100644 --- a/src/gfn/gym/discrete_ebm.py +++ b/src/gfn/gym/discrete_ebm.py @@ -48,7 +48,7 @@ def __init__( alpha: float = 1.0, device_str: Literal["cpu", "cuda"] = "cpu", preprocessor_name: Literal["Identity", "Enum"] = "Identity", - log_reward_clip: float = -20., + log_reward_clip: float = -100., ): """Discrete EBM environment. diff --git a/src/gfn/gym/hypergrid.py b/src/gfn/gym/hypergrid.py index 882cc5c5..0ddfc2c9 100644 --- a/src/gfn/gym/hypergrid.py +++ b/src/gfn/gym/hypergrid.py @@ -25,7 +25,7 @@ def __init__( reward_cos: bool = False, device_str: Literal["cpu", "cuda"] = "cpu", preprocessor_name: Literal["KHot", "OneHot", "Identity", "Enum"] = "KHot", - log_reward_clip: float = -20., + log_reward_clip: float = -100., ): """HyperGrid environment from the GFlowNets paper. The states are represented as 1-d tensors of length `ndim` with values in