From fe3d2d2e73beb4378b0acf95f8fcbdc9fa10afeb Mon Sep 17 00:00:00 2001 From: quintin Date: Tue, 19 Jun 2018 17:48:14 -0400 Subject: [PATCH] fixed default hyperparamters --- devel.py | 46 +++++++++++++++++++++++++++++++++++++++- utils/hyperparameters.py | 24 ++++++++++----------- 2 files changed, 57 insertions(+), 13 deletions(-) diff --git a/devel.py b/devel.py index 89bf8f1..e4deba7 100644 --- a/devel.py +++ b/devel.py @@ -14,6 +14,51 @@ from utils.hyperparameters import Config from agents.Rainbow import Model +config = Config() + +#algorithm control +config.USE_NOISY_NETS=False +config.USE_PRIORITY_REPLAY=False + +#Multi-step returns +config.N_STEPS = 1 + +#epsilon variables +config.epsilon_start = 1.0 +config.epsilon_final = 0.01 +config.epsilon_decay = 500 +config.epsilon_by_frame = lambda frame_idx: config.epsilon_final + (config.epsilon_start - config.epsilon_final) * math.exp(-1. * frame_idx / config.epsilon_decay) + +#misc agent variables +config.GAMMA=0.99 +config.LR=1e-4 + +#memory +config.TARGET_NET_UPDATE_FREQ = 128 +config.EXP_REPLAY_SIZE = 10000 +config.BATCH_SIZE = 32 +config.PRIORITY_ALPHA=0.6 +config.PRIORITY_BETA_START=0.4 +config.PRIORITY_BETA_FRAMES = 100000 + +#Noisy Nets +config.SIGMA_INIT=0.5 + +#Learning control variables +config.LEARN_START = config.BATCH_SIZE*2 +config.MAX_FRAMES=100000 + +#Categorical Params +config.ATOMS = 51 +config.V_MAX = 50 +config.V_MIN = 0 + +#Quantile Regression Parameters +config.QUANTILES=21 + +#DRQN Parameters +config.SEQUENCE_LENGTH=8 + def plot(frame_idx, rewards, losses, elapsed_time): #clear_output(True) @@ -37,7 +82,6 @@ def plot(frame_idx, rewards, losses, elapsed_time): env = wrap_pytorch(env)''' env = gym.make('CartPole-v0') #env = wrappers.Monitor(env, 'Delete', force=True) - config = Config() model = Model(env=env, config=config) losses = [] diff --git a/utils/hyperparameters.py b/utils/hyperparameters.py index a055f3a..5fe8105 100644 --- a/utils/hyperparameters.py +++ b/utils/hyperparameters.py @@ -16,7 +16,7 @@ def __init__(self): #epsilon variables self.epsilon_start = 1.0 self.epsilon_final = 0.01 - self.epsilon_decay = 500 + self.epsilon_decay = 30000 self.epsilon_by_frame = lambda frame_idx: self.epsilon_final + (self.epsilon_start - self.epsilon_final) * math.exp(-1. * frame_idx / self.epsilon_decay) #misc agent variables @@ -24,30 +24,30 @@ def __init__(self): self.LR=1e-4 #memory - self.TARGET_NET_UPDATE_FREQ = 256 - self.EXP_REPLAY_SIZE = 10000 - self.BATCH_SIZE = 128 - self.PRIORITY_ALPHA=0.3 + self.TARGET_NET_UPDATE_FREQ = 1000 + self.EXP_REPLAY_SIZE = 100000 + self.BATCH_SIZE = 32 + self.PRIORITY_ALPHA=0.6 self.PRIORITY_BETA_START=0.4 - self.PRIORITY_BETA_FRAMES = 1000 + self.PRIORITY_BETA_FRAMES = 100000 #Noisy Nets - self.SIGMA_INIT=0.2 + self.SIGMA_INIT=0.5 #Learning control variables - self.LEARN_START = self.BATCH_SIZE*2 + self.LEARN_START = 10000 self.MAX_FRAMES=100000 #Categorical Params self.ATOMS = 51 - self.V_MAX = 50 - self.V_MIN = 0 + self.V_MAX = 10 + self.V_MIN = -10 #Quantile Regression Parameters - self.QUANTILES=11 + self.QUANTILES=51 #DRQN Parameters - self.SEQUENCE_LENGTH=10 + self.SEQUENCE_LENGTH=8 '''