-
Notifications
You must be signed in to change notification settings - Fork 5
/
train_ppo_config.yaml
125 lines (89 loc) · 4.1 KB
/
train_ppo_config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
# use the new RLLib api with RLModule
use_rl_module: true
# if not using RLModule: model name: CIL_multiview_rllib or CIL_multiview_rllib_stack
model: CIL_multiview_rllib
# the distribution to use for the policy, options: gaussian, beta
output_distribution: gaussian
########################################################################################
# Choose only one (or none) from the below (if more than one selection the order here is followed)
# do not share weights between policy and value networks
use_separate_vf: false
# use the pretrained model as feature extractor
use_stacked_model: false
########################################################################################
########################################################################################
# Choose only one (or none) from the below (if more than one selection the order here is followed)
# calculate the kl loss of the pretrained policy and the current (works only with RLModule)
use_pretrained_kl_loss: true
# calculate the kl loss of the pretrained policy and the current (works only with RLModule)
use_pretrained_scaled_kl_loss: false
# use 2 clip losses, the normal one and one using the pretrained model (works only with RLModule)
use_double_clip_loss: false
# calculate the l1 loss of the pretrained policy and the current (works only with RLModule)
use_pretrained_l1_loss: false
########################################################################################
# use CILv2_vec_env (vector environment)
use_vec_env: false
# use CILv2_sub_env (runs the environment in a subprocess environment)
use_sub_env: true
# use CILv2_multiagent_env (run many vehicles in a single server, only works in
# free ride mode)
use_multiagent_env: false
use_multiagent_sub_env: false # runs CILv2_multiagent_env on subprocess
num_agents_per_server: 8 # number of agents in each environment
no_rollout_workers: true # if use_vec_env is true, set num_rollout_workers=0
# file to a checkpoint from which we start the training
checkpoint_file: models/CILv2_multiview/_results/checkpoints/CIL_multiview_actor_critic/CIL_multiview_actor_critic_final.pth
# path the the models config file
path_to_conf: ./models/CILv2_multiview/_results/Ours/Town12346_5/config40.json
# for the value function (critic) pretraining
pretrain_value: true # enables pretraining
pretrain_iters: 2
pretrain_complete_episodes: true # use batch_mode: complete_episodes for the pretraining
lr_pretrain: 5.0e-05 # lr for pretraining (null or remove it to use the same as training)
# normalize the target value function (with exponentially weighted moving average)
norm_target_value: true
train_iters: 1000
# for sampling
num_workers: 8 # number of parallel environments
num_cpus_per_worker: 1
worker_gpus: 0 # number of gpus for the sampling (collecting the observations)
training_gpus: 4 # number of gpus for the training (updating the weights)
# for training
num_learner_workers: 0
num_cpus_per_learner_worker: 1
num_gpus_per_learner_worker: 0
checkpoint_freq: 20 # frequency for the checkpoints
# extra parameters for the PPOConfig
extra_params:
disable_env_checking: true
explore: false # default true
lr: 5.0e-06
train_batch_size: 2000
sgd_minibatch_size: 128
num_sgd_iter: 30
gamma: 0.99
clip_param: 0.2
vf_clip_param: 10.0
vf_loss_coeff: 1.0
use_gae: true
lambda_: 0.95
use_kl_loss: true
kl_coeff: 0.2
kl_target: 0.01
shuffle_sequences: true
entropy_coeff: 0.01
entropy_coeff_schedule: null
# _disable_preprocessor_api: true # in older version uses numpy array if used???
recreate_failed_workers: false # default false
restart_failed_sub_environments: false # default false
torch_compile_learner: true # default false
torch_compile_learner_dynamo_backend: ipex # inductor
torch_compile_learner_dynamo_mode: max-autotune
torch_compile_worker: true # default false
torch_compile_worker_dynamo_backend: inductor
worker_restore_timeout_s: 1800.0 # 180.0
worker_health_probe_timeout_s: 600 # 60
sync_filters_on_rollout_workers_timeout_s: 600.0 # 60.0
metrics_episode_collection_timeout_s: 600.0 # 60.0
evaluation_sample_timeout_s: 1800.0 # 180.0