forked from openai/baselines
-
Notifications
You must be signed in to change notification settings - Fork 4
/
cfg_rogue_f5f_10th.cfg
91 lines (85 loc) · 2.22 KB
/
cfg_rogue_f5f_10th.cfg
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
# Configuration for descending until the tenth level
# using the three towers model and a complete map state representation
# that hides dark tiles in dark rooms, just like the game screen
[Rogue]
# Maximum number of steps in an episode
max_episode_len = 1200
# Number of latest episodes to use for stats
episodes_for_evaluation = 200
# State generator name
state_generator = FullMap_5L_forget_StateGenerator
# Reward generator name
reward_generator = StairsOnly_NthLevel_RewardGenerator
# Actions available
actions = hjkl>
# Wheter to enable monsters
use_monsters = False
# Whether to enable hidden tiles
enable_secrets = False
# Level where the amulet of Yendor will be
amulet_level = 26
# Number of steps after which the rouge becomes faint
hungertime = 1300
# Maximum number of traps
max_traps = 0
# Whether to turn the descent action into ascent when the amulet level is reached
transform_descent_action = True
# Whether to send the refresh command after each action
refresh_after_commands = False
[RNG]
# RNG seed
seed = 0
[Training]
# Whether to use gpu
use_gpu = True
# Policy model to use
policy = Towers_LSTM
# Number of environments to run in parallel
num_env = 16
# Number of n-steps returns
nsteps = 60
# Total number of training steps
total_timesteps = 1e8
# Replay buffer size
buffer_size = 50000
# Ratio of memory replay
replay_ratio = 4
# Training steps at which memory replay will begin
replay_start = 10000
# Q loss coefficinet
q_coef = 0.5
# Entropy loss coefficient
ent_coef = 0.01
# Gradients clipping ratio
max_grad_norm = 10
# Inital learning rate
lr = 7e-4
# Learning rate decay type
lrschedule = linear
# RMSprop epsilon
rprop_epsilon = 1e-5
# RMSprop alpha
rprop_alpha = 0.99
# Discount value
gamma = 0.99
# Importance sampling truncated ratio
c = 10.0
# Whether to use TRPO
trust_region = True
# Average policy soft update coefficient
alpha = 0.99
# Delta as defined in ACER for TRPO
delta = 1
[Log]
# Logging directory
log_dir = save_f5f_10th
# Logging interval in number of batches
log_interval = 100
# Custom stats interval in number of batches
stats_interval = 1
# Save directory
save_dir = save_f5f_10th
# Saving interval in number of batches
save_interval = 100
# Permanently keep a checkpoint every n hours
permanent_save_hours = 12