diff --git a/examples/rl/config_overrides/quadrotor_2D_attitude/dppo_quadrotor_2D_attitude.yaml b/examples/rl/config_overrides/quadrotor_2D_attitude/dppo_quadrotor_2D_attitude.yaml index 8bb6ed29a..a63a2514c 100644 --- a/examples/rl/config_overrides/quadrotor_2D_attitude/dppo_quadrotor_2D_attitude.yaml +++ b/examples/rl/config_overrides/quadrotor_2D_attitude/dppo_quadrotor_2D_attitude.yaml @@ -1,57 +1,58 @@ -algo_config: - # model args - hidden_dim: 128 - activation: "tanh" + algo_config: + # model args + hidden_dim: 128 + activation: "tanh" - # loss args - gamma: 0.98 - use_gae: True - gae_lambda: 0.92 - clip_param: 0.2 - target_kl: 1.0e-2 - entropy_coef: 0.01 - quantile_count: 256 - value_loss: 'quantile_l1' + # loss args + gamma: 0.98 + use_gae: True + gae_lambda: 0.92 + clip_param: 0.2 + target_kl: 1.0e-2 + entropy_coef: 0.005 + quantile_count: 256 + value_loss: 'quantile_l1' - # optim args - opt_epochs: 20 - mini_batch_size: 256 - actor_lr: 0.001 - critic_lr: 0.001 + # optim args + opt_epochs: 20 + mini_batch_size: 256 + actor_lr: 0.001 + critic_lr: 0.001 - # runner args - max_env_steps: 396000 - rollout_batch_size: 5 - rollout_steps: 660 - eval_batch_size: 10 + # runner args + max_env_steps: 660000 + rollout_batch_size: 5 + rollout_steps: 660 + eval_batch_size: 10 - # misc - log_interval: 6600 - save_interval: 660000 - num_checkpoints: 0 - eval_interval: 6600 - eval_save_best: True - tensorboard: False + # misc + log_interval: 13200 + save_interval: 660000 + num_checkpoints: 0 + eval_interval: 13200 + eval_save_best: True + tensorboard: False #algo_config: # # model args -# hidden_dim: 128 -# activation: "relu" +# hidden_dim: 8 +# activation: "tanh" # # # loss args # gamma: 0.98 -# use_gae: True # or False -# gae_lambda: 0.92 -# use_clipped_value: False # or True -# clip_param: 0.1 -# target_kl: 1.0e-5 -# entropy_coef: 0.005 +# use_gae: True +# gae_lambda: 0.9 +# clip_param: 0.2 +# target_kl: 2.32e-2 +# entropy_coef: 0.09 +# quantile_count: 256 +# value_loss: 'quantile_l1' # # # optim args # opt_epochs: 20 # mini_batch_size: 256 -# actor_lr: 0.0005 -# critic_lr: 0.0005 +# actor_lr: 0.0012 +# critic_lr: 0.0012 # # # runner args # max_env_steps: 660000 diff --git a/examples/rl/config_overrides/quadrotor_2D_attitude/ppo_quadrotor_2D_attitude.yaml b/examples/rl/config_overrides/quadrotor_2D_attitude/ppo_quadrotor_2D_attitude.yaml index f9ad6578c..18c5144a2 100644 --- a/examples/rl/config_overrides/quadrotor_2D_attitude/ppo_quadrotor_2D_attitude.yaml +++ b/examples/rl/config_overrides/quadrotor_2D_attitude/ppo_quadrotor_2D_attitude.yaml @@ -1,66 +1,65 @@ -algo_config: - # model args - hidden_dim: 128 - activation: "tanh" + algo_config: + # model args + hidden_dim: 128 + activation: "tanh" - # loss args - gamma: 0.98 - use_gae: True - gae_lambda: 0.92 - clip_param: 0.2 - target_kl: 1.0e-2 - entropy_coef: 0.01 + # loss args + gamma: 0.98 + use_gae: True + gae_lambda: 0.92 + clip_param: 0.2 + target_kl: 1.0e-2 + entropy_coef: 0.005 - # optim args - opt_epochs: 20 - mini_batch_size: 256 - actor_lr: 0.001 - critic_lr: 0.001 + # optim args + opt_epochs: 20 + mini_batch_size: 256 + actor_lr: 0.001 + critic_lr: 0.001 - # runner args - max_env_steps: 396000 - rollout_batch_size: 5 - rollout_steps: 660 - eval_batch_size: 10 + # runner args + max_env_steps: 660000 + rollout_batch_size: 5 + rollout_steps: 660 + eval_batch_size: 10 - # misc - log_interval: 6600 - save_interval: 660000 - num_checkpoints: 0 - eval_interval: 6600 - eval_save_best: True - tensorboard: False + # misc + log_interval: 13200 + save_interval: 660000 + num_checkpoints: 0 + eval_interval: 13200 + eval_save_best: True + tensorboard: False #algo_config: # # model args -# hidden_dim: 128 -# activation: "relu" +# hidden_dim: 8 +# activation: "tanh" # # # loss args # gamma: 0.98 -# use_gae: True # or False -# gae_lambda: 0.92 -# use_clipped_value: False # or True -# clip_param: 0.1 -# target_kl: 1.0e-5 -# entropy_coef: 0.003 +# use_gae: True +# gae_lambda: 0.9 +# clip_param: 0.2 +# target_kl: 2.32e-2 +# entropy_coef: 0.09 # # # optim args -# opt_epochs: 25 +# opt_epochs: 20 # mini_batch_size: 256 -# actor_lr: 7.2e-5 -# critic_lr: 0.0266 +# actor_lr: 0.0012 +# critic_lr: 0.0012 # # # runner args -# max_env_steps: 216000 +# max_env_steps: 660000 # rollout_batch_size: 5 # rollout_steps: 660 # eval_batch_size: 10 # # # misc -# log_interval: 6600 +# log_interval: 13200 # save_interval: 660000 # num_checkpoints: 0 -# eval_interval: 6600 +# eval_interval: 13200 # eval_save_best: True # tensorboard: False diff --git a/examples/rl/config_overrides/quadrotor_2D_attitude/quadrotor_2D_attitude_track.yaml b/examples/rl/config_overrides/quadrotor_2D_attitude/quadrotor_2D_attitude_track.yaml index 263508a32..a4b350a9d 100644 --- a/examples/rl/config_overrides/quadrotor_2D_attitude/quadrotor_2D_attitude_track.yaml +++ b/examples/rl/config_overrides/quadrotor_2D_attitude/quadrotor_2D_attitude_track.yaml @@ -61,8 +61,10 @@ task_config: obs_goal_horizon: 1 # RL Reward - rew_state_weight: [10., .0, 10., .0, .0, 0.0] - rew_act_weight: [.0, .0] + rew_state_weight: [10., .1, 10., .1, .1, 0.001] + rew_act_weight: [.1, .1] +# rew_state_weight: [ 8.45, .48, 9.18, .81, .49, 0.001] +# rew_act_weight: [ .855, 1.194] rew_exponential: True disturbances: