Skip to content

Commit

Permalink
Adding cartpole with optimized hyperparameters
Browse files Browse the repository at this point in the history
  • Loading branch information
pizarrob committed Sep 25, 2023
1 parent c4d851a commit 3814bd7
Show file tree
Hide file tree
Showing 2,453 changed files with 108,659 additions and 488,973 deletions.
33 changes: 17 additions & 16 deletions experiments/mpsc/config_overrides/cartpole/ppo_cartpole.yaml
Original file line number Diff line number Diff line change
@@ -1,41 +1,42 @@
algo: ppo
algo_config:
# model args
hidden_dim: 64
hidden_dim: 32
activation: 'leaky_relu'
norm_obs: False
norm_reward: False
clip_obs: 10.0
clip_reward: 10.0

# loss args
gamma: 0.99
use_gae: True
gae_lambda: 0.95
gamma: 0.98
use_gae: False
gae_lambda: 0.8
use_clipped_value: False
clip_param: 0.2
target_kl: 0.01
entropy_coef: 0.01
clip_param: 0.1
target_kl: 1.587713889686473e-07
entropy_coef: 0.00010753631441212628

# optim args
opt_epochs: 10
mini_batch_size: 64
actor_lr: 0.0003
critic_lr: 0.001
opt_epochs: 5
mini_batch_size: 128
actor_lr: 0.0007948148615930024
critic_lr: 0.007497368468753617
max_grad_norm: 0.5

# runner args
max_env_steps: 200000
max_env_steps: 300000
num_workers: 1
rollout_batch_size: 1
rollout_steps: 100
rollout_steps: 150
deque_size: 10
eval_batch_size: 10

# misc
log_interval: 1000
save_interval: 10000
log_interval: 6000
save_interval: 0
num_checkpoints: 0
eval_interval: 1000
eval_interval: 6000
eval_save_best: True
tensorboard: False

Expand Down
25 changes: 16 additions & 9 deletions experiments/mpsc/config_overrides/cartpole/sac_cartpole.yaml
Original file line number Diff line number Diff line change
@@ -1,30 +1,37 @@
algo: sac
algo_config:
# model args
hidden_dim: 64
hidden_dim: 256
activation: 'relu'

# loss args
gamma: 0.98
tau: 0.12145208815621376
init_temperature: 0.2
use_entropy_tuning: False
target_entropy: null

# optim args
train_interval: 100
train_batch_size: 64
actor_lr: 0.001
critic_lr: 0.001
train_batch_size: 512
actor_lr: 0.00045196308120485273
critic_lr: 0.022547326782152065
entropy_lr: 0.001

# runner args
max_env_steps: 200000
warm_up_steps: 1000
max_env_steps: 150000
warm_up_steps: 100
rollout_batch_size: 1
num_workers: 1
max_buffer_size: 1000000
deque_size: 10
eval_batch_size: 10

# misc
log_interval: 1000
save_interval: 10000
log_interval: 3000
save_interval: 0
num_checkpoints: 0
eval_interval: 1000
eval_interval: 3000
eval_save_best: True
tensorboard: False

Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
algo: safe_explorer_ppo
algo_config:
# model args
hidden_dim: 64
hidden_dim: 32
activation: 'leaky_relu'
norm_obs: False
norm_reward: False
clip_obs: 10.0
Expand All @@ -14,34 +15,34 @@ algo_config:
constraint_hidden_dim: 100

# loss args
gamma: 0.99
use_gae: True
gae_lambda: 0.95
gamma: 0.98
use_gae: False
gae_lambda: 0.8
use_clipped_value: False
clip_param: 0.2
target_kl: 0.01
entropy_coef: 0.01
clip_param: 0.1
target_kl: 1.587713889686473e-07
entropy_coef: 0.00010753631441212628

# optim args
opt_epochs: 10
mini_batch_size: 64
actor_lr: 0.0003
critic_lr: 0.001
opt_epochs: 5
mini_batch_size: 128
actor_lr: 0.0007948148615930024
critic_lr: 0.007497368468753617
max_grad_norm: 0.5

# runner args
max_env_steps: 200000
max_env_steps: 300000
num_workers: 1
rollout_batch_size: 1
rollout_steps: 100
rollout_batch_size: 4
rollout_steps: 150
deque_size: 10
eval_batch_size: 10

# misc
log_interval: 1000
save_interval: 10000
log_interval: 6000
save_interval: 0
num_checkpoints: 0
eval_interval: 1000
eval_interval: 6000
eval_save_best: True
tensorboard: False

Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
algo: safe_explorer_ppo
algo_config:
# model args
hidden_dim: 64
hidden_dim: 32
activation: 'leaky_relu'
norm_obs: False
norm_reward: False
clip_obs: 10.0
Expand All @@ -21,34 +22,34 @@ algo_config:
constraint_slack: [0.01, 0.01, 0.01, 0.05, 0.01, 0.01, 0.01, 0.05]

# loss args
gamma: 0.99
gamma: 0.98
use_gae: False
gae_lambda: 0.95
gae_lambda: 0.8
use_clipped_value: False
clip_param: 0.2
target_kl: 0.01
entropy_coef: 0.01
clip_param: 0.1
target_kl: 1.587713889686473e-07
entropy_coef: 0.00010753631441212628

# optim args
opt_epochs: 10
mini_batch_size: 64
actor_lr: 0.0003
critic_lr: 0.001
opt_epochs: 5
mini_batch_size: 128
actor_lr: 0.0007948148615930024
critic_lr: 0.007497368468753617
max_grad_norm: 0.5

# runner args
max_env_steps: 100000
num_workers: 1
rollout_batch_size: 4
rollout_steps: 100
rollout_steps: 150
deque_size: 10
eval_batch_size: 10

# misc
log_interval: 10
save_interval: 10
log_interval: 100
save_interval: 0
num_checkpoints: 0
eval_interval: 10
eval_interval: 100
eval_save_best: True
tensorboard: False

Expand Down
Original file line number Diff line number Diff line change
@@ -1,43 +1,45 @@
algo: ppo
algo_config:
actor_lr: 0.0003
activation: leaky_relu
actor_lr: 0.0007948148615930024
clip_obs: 10.0
clip_param: 0.2
clip_param: 0.1
clip_reward: 10.0
critic_lr: 0.001
critic_lr: 0.007497368468753617
deque_size: 10
entropy_coef: 0.01
entropy_coef: 0.00010753631441212628
eval_batch_size: 10
eval_interval: 1000
eval_interval: 6000
eval_save_best: true
filter_train_actions: true
gae_lambda: 0.95
gamma: 0.99
hidden_dim: 64
log_interval: 1000
max_env_steps: 200000
gae_lambda: 0.8
gamma: 0.98
hidden_dim: 32
log_interval: 6000
max_env_steps: 300000
max_grad_norm: 0.5
mini_batch_size: 64
mini_batch_size: 128
norm_obs: false
norm_reward: false
num_checkpoints: 0
num_workers: 1
opt_epochs: 10
opt_epochs: 5
penalize_sf_diff: false
pretrained: ./models/rl_models/cartpole/stab/ppo_pretrain/
rollout_batch_size: 1
rollout_steps: 100
save_interval: 10000
rollout_steps: 150
save_interval: 0
sf_penalty: 300
target_kl: 0.01
target_kl: 1.587713889686473e-07
tensorboard: false
training: true
use_clipped_value: false
use_gae: true
use_gae: false
use_safe_reset: false
device: cpu
kv_overrides:
- task_config.init_state=None
- task_config.use_constraint_penalty=False
- sf_config.cost_function=one_step_cost
- sf_config.mpsc_cost_horizon=2
- sf_config.decay_factor=0.85
Expand Down
Loading

0 comments on commit 3814bd7

Please sign in to comment.