fixed default hyperparamters

qfettes · Jun 19, 2018 · fe3d2d2 · fe3d2d2
1 parent 296c7ad
commit fe3d2d2
Show file tree

Hide file tree

Showing 2 changed files with 57 additions and 13 deletions.
diff --git a/devel.py b/devel.py
@@ -14,6 +14,51 @@
 from utils.hyperparameters import Config
 from agents.Rainbow import Model
 
+config = Config()
+
+#algorithm control
+config.USE_NOISY_NETS=False
+config.USE_PRIORITY_REPLAY=False
+
+#Multi-step returns
+config.N_STEPS = 1
+
+#epsilon variables
+config.epsilon_start = 1.0
+config.epsilon_final = 0.01
+config.epsilon_decay = 500
+config.epsilon_by_frame = lambda frame_idx: config.epsilon_final + (config.epsilon_start - config.epsilon_final) * math.exp(-1. * frame_idx / config.epsilon_decay)
+
+#misc agent variables
+config.GAMMA=0.99
+config.LR=1e-4
+
+#memory
+config.TARGET_NET_UPDATE_FREQ = 128
+config.EXP_REPLAY_SIZE = 10000
+config.BATCH_SIZE = 32
+config.PRIORITY_ALPHA=0.6
+config.PRIORITY_BETA_START=0.4
+config.PRIORITY_BETA_FRAMES = 100000
+
+#Noisy Nets
+config.SIGMA_INIT=0.5
+
+#Learning control variables
+config.LEARN_START = config.BATCH_SIZE*2
+config.MAX_FRAMES=100000
+
+#Categorical Params
+config.ATOMS = 51
+config.V_MAX = 50
+config.V_MIN = 0
+
+#Quantile Regression Parameters
+config.QUANTILES=21
+
+#DRQN Parameters
+config.SEQUENCE_LENGTH=8
+
 
 def plot(frame_idx, rewards, losses, elapsed_time):
     #clear_output(True)
@@ -37,7 +82,6 @@ def plot(frame_idx, rewards, losses, elapsed_time):
     env    = wrap_pytorch(env)'''
     env = gym.make('CartPole-v0')
     #env = wrappers.Monitor(env, 'Delete', force=True)
-    config = Config()
     model = Model(env=env, config=config)
 
     losses = []

diff --git a/utils/hyperparameters.py b/utils/hyperparameters.py
@@ -16,38 +16,38 @@ def __init__(self):
         #epsilon variables
         self.epsilon_start = 1.0
         self.epsilon_final = 0.01
-        self.epsilon_decay = 500
+        self.epsilon_decay = 30000
         self.epsilon_by_frame = lambda frame_idx: self.epsilon_final + (self.epsilon_start - self.epsilon_final) * math.exp(-1. * frame_idx / self.epsilon_decay)
 
         #misc agent variables
         self.GAMMA=0.99
         self.LR=1e-4
 
         #memory
-        self.TARGET_NET_UPDATE_FREQ = 256
-        self.EXP_REPLAY_SIZE = 10000
-        self.BATCH_SIZE = 128
-        self.PRIORITY_ALPHA=0.3
+        self.TARGET_NET_UPDATE_FREQ = 1000
+        self.EXP_REPLAY_SIZE = 100000
+        self.BATCH_SIZE = 32
+        self.PRIORITY_ALPHA=0.6
         self.PRIORITY_BETA_START=0.4
-        self.PRIORITY_BETA_FRAMES = 1000
+        self.PRIORITY_BETA_FRAMES = 100000
 
         #Noisy Nets
-        self.SIGMA_INIT=0.2
+        self.SIGMA_INIT=0.5
 
         #Learning control variables
-        self.LEARN_START = self.BATCH_SIZE*2
+        self.LEARN_START = 10000
         self.MAX_FRAMES=100000
 
         #Categorical Params
         self.ATOMS = 51
-        self.V_MAX = 50
-        self.V_MIN = 0
+        self.V_MAX = 10
+        self.V_MIN = -10
 
         #Quantile Regression Parameters
-        self.QUANTILES=11
+        self.QUANTILES=51
 
         #DRQN Parameters
-        self.SEQUENCE_LENGTH=10
+        self.SEQUENCE_LENGTH=8
 
 
 '''