Skip to content

Commit

Permalink
fixed default hyperparamters
Browse files Browse the repository at this point in the history
  • Loading branch information
qfettes committed Jun 19, 2018
1 parent 296c7ad commit fe3d2d2
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 13 deletions.
46 changes: 45 additions & 1 deletion devel.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,51 @@
from utils.hyperparameters import Config
from agents.Rainbow import Model

config = Config()

#algorithm control
config.USE_NOISY_NETS=False
config.USE_PRIORITY_REPLAY=False

#Multi-step returns
config.N_STEPS = 1

#epsilon variables
config.epsilon_start = 1.0
config.epsilon_final = 0.01
config.epsilon_decay = 500
config.epsilon_by_frame = lambda frame_idx: config.epsilon_final + (config.epsilon_start - config.epsilon_final) * math.exp(-1. * frame_idx / config.epsilon_decay)

#misc agent variables
config.GAMMA=0.99
config.LR=1e-4

#memory
config.TARGET_NET_UPDATE_FREQ = 128
config.EXP_REPLAY_SIZE = 10000
config.BATCH_SIZE = 32
config.PRIORITY_ALPHA=0.6
config.PRIORITY_BETA_START=0.4
config.PRIORITY_BETA_FRAMES = 100000

#Noisy Nets
config.SIGMA_INIT=0.5

#Learning control variables
config.LEARN_START = config.BATCH_SIZE*2
config.MAX_FRAMES=100000

#Categorical Params
config.ATOMS = 51
config.V_MAX = 50
config.V_MIN = 0

#Quantile Regression Parameters
config.QUANTILES=21

#DRQN Parameters
config.SEQUENCE_LENGTH=8


def plot(frame_idx, rewards, losses, elapsed_time):
#clear_output(True)
Expand All @@ -37,7 +82,6 @@ def plot(frame_idx, rewards, losses, elapsed_time):
env = wrap_pytorch(env)'''
env = gym.make('CartPole-v0')
#env = wrappers.Monitor(env, 'Delete', force=True)
config = Config()
model = Model(env=env, config=config)

losses = []
Expand Down
24 changes: 12 additions & 12 deletions utils/hyperparameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,38 +16,38 @@ def __init__(self):
#epsilon variables
self.epsilon_start = 1.0
self.epsilon_final = 0.01
self.epsilon_decay = 500
self.epsilon_decay = 30000
self.epsilon_by_frame = lambda frame_idx: self.epsilon_final + (self.epsilon_start - self.epsilon_final) * math.exp(-1. * frame_idx / self.epsilon_decay)

#misc agent variables
self.GAMMA=0.99
self.LR=1e-4

#memory
self.TARGET_NET_UPDATE_FREQ = 256
self.EXP_REPLAY_SIZE = 10000
self.BATCH_SIZE = 128
self.PRIORITY_ALPHA=0.3
self.TARGET_NET_UPDATE_FREQ = 1000
self.EXP_REPLAY_SIZE = 100000
self.BATCH_SIZE = 32
self.PRIORITY_ALPHA=0.6
self.PRIORITY_BETA_START=0.4
self.PRIORITY_BETA_FRAMES = 1000
self.PRIORITY_BETA_FRAMES = 100000

#Noisy Nets
self.SIGMA_INIT=0.2
self.SIGMA_INIT=0.5

#Learning control variables
self.LEARN_START = self.BATCH_SIZE*2
self.LEARN_START = 10000
self.MAX_FRAMES=100000

#Categorical Params
self.ATOMS = 51
self.V_MAX = 50
self.V_MIN = 0
self.V_MAX = 10
self.V_MIN = -10

#Quantile Regression Parameters
self.QUANTILES=11
self.QUANTILES=51

#DRQN Parameters
self.SEQUENCE_LENGTH=10
self.SEQUENCE_LENGTH=8


'''
Expand Down

0 comments on commit fe3d2d2

Please sign in to comment.