Skip to content

Commit

Permalink
Adding final comparison between MPC, PPO, PPO w MPSF, PPO trained w MPSF
Browse files Browse the repository at this point in the history
  • Loading branch information
Federico-PizarroBejarano committed Nov 15, 2024
1 parent 1fac28d commit 7c43d8a
Show file tree
Hide file tree
Showing 15 changed files with 91 additions and 173 deletions.
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
algo: mpc_acados
algo_config:
horizon: 25
q_mpc: [18, 0.1, 18, 0.5, 0.5, 0.0001]
r_mpc: [3., 3.]
q_mpc: [10, 0.1, 10, 0.1, 0.1, 0.001]
r_mpc: [0.1, 0.1]
prior_info:
prior_prop:
beta_1: 18.11298
Expand All @@ -15,3 +15,6 @@ algo_config:
prior_prop_rand_info: null
warmstart: True
output_dir: ./mpc_acados/results

soft_constraints: True
soft_penalty: 1000.0
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ task_config:
ctrl_freq: 60
pyb_freq: 60
physics: dyn_si
# physics: pyb
quad_type: 4

init_state:
Expand All @@ -13,58 +12,34 @@ task_config:
init_z_dot: 0
init_theta: 0
init_theta_dot: 0
randomized_init: True
randomized_init: False
randomized_inertial_prop: False

init_state_randomization_info:
init_x:
distrib: 'uniform'
low: -0.05
high: 0.05
init_x_dot:
distrib: 'uniform'
low: -0.05
high: 0.05
init_z:
distrib: 'uniform'
low: -0.05
high: 0.05
init_z_dot:
distrib: 'uniform'
low: -0.05
high: 0.05
init_theta:
distrib: 'uniform'
low: -0.05
high: 0.05
init_theta_dot:
distrib: 'uniform'
low: -0.05
high: 0.05

task: traj_tracking
task_info:
trajectory_type: figure8
num_cycles: 2
trajectory_plane: 'xz'
trajectory_position_offset: [0, 1.]
trajectory_scale: 1.0
# ilqr_ref: True
# ilqr_traj_data: /home/mingxuan/Repositories/scg_tsung/examples/lqr/ilqr_ref_traj.npy

inertial_prop:
M: 0.033
Iyy: 1.4e-05
beta_1: 18.11
beta_2: 3.68
beta_3: 0.0
alpha_1: -140.8
alpha_2: -13.4
alpha_3: 124.8
pitch_bias: 0.0 # in radian

episode_len_sec: 11
cost: quadratic
cost: rl_reward
obs_goal_horizon: 0

# RL Reward
rew_state_weight: [10, 0.1, 10, 0.1, 0.1, 0.001]
rew_act_weight: [0.1, 0.1]
rew_exponential: True

# disturbances:
# observation:
# - disturbance_func: white_noise
# std: [5.6e-05, 1.5e-04, 2.9e-05, 8.0e-04, 1.3e-04, 3.6e-04, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]

constraints:
- constraint_form: default_constraint
Expand All @@ -73,17 +48,6 @@ task_config:
lower_bounds: [-0.9, -2, 0.55, -2, -0.75, -3]
- constraint_form: default_constraint
constrained_variable: input
upper_bounds: [0.58212, 0.7]
lower_bounds: [0.09702, -0.7]

done_on_out_of_bound: True
done_on_violation: False
disturbances:
# dynamics: # disturbance force in newton
# - disturbance_func: uniform
# low: 1.
# high: 1.
# mask: [1, 0, 0, 0]
observation:
- disturbance_func: white_noise
std: [5.6e-05, 1.5e-02, 2.9e-05, 8.0e-03, 1.3e-03, 3.5e-03]
16 changes: 15 additions & 1 deletion benchmarking_sim/quadrotor/mb_experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,11 +103,25 @@ def run(gui=False, n_episodes=1, n_steps=None, save_data=True):
random_env = env_func(gui=False)

# Create controller.
config.task_config.constraints[0].upper_bounds = [0.899, 1.99, 1.449, 1.99, 0.749, 2.99]
config.task_config.constraints[0].lower_bounds = [-0.899, -1.99, 0.551, -1.99, -0.749, -2.99]
config.task_config.constraints[1].upper_bounds = [0.59, 0.436]
config.task_config.constraints[1].lower_bounds = [0.113, -0.436]

ctrl_env_func = partial(make,
config.task,
seed=config.seed,
**config.task_config
)
ctrl = make(config.algo,
env_func,
ctrl_env_func,
seed=config.seed,
**config.algo_config
)
config.task_config.constraints[0].upper_bounds = [0.9, 2, 1.45, 2, 0.75, 3]
config.task_config.constraints[0].lower_bounds = [-0.9, -2, 0.55, -2, -0.75, -3]
config.task_config.constraints[1].upper_bounds = [0.59336579, 0.43633232]
config.task_config.constraints[1].lower_bounds = [0.11264675, -0.43633232]

# Setup safety filter
if SAFETY_FILTER is not None:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
algo: ppo
algo_config:
# model args
hidden_dim: 128
activation: "tanh"
activation: tanh

# loss args
gamma: 0.98
Expand All @@ -18,15 +19,15 @@ algo_config:
critic_lr: 0.001

# runner args
max_env_steps: 396000
rollout_batch_size: 5
max_env_steps: 2640000
rollout_batch_size: 1
rollout_steps: 660
eval_batch_size: 10

# misc
log_interval: 39600
save_interval: 396000
log_interval: 66000
save_interval: 1320000
num_checkpoints: 0
eval_interval: 39600
eval_interval: 66000
eval_save_best: True
tensorboard: False
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
task_config:
seed: 1337
info_in_reset: True
ctrl_freq: 60
pyb_freq: 60
Expand All @@ -13,44 +12,16 @@ task_config:
init_z_dot: 0
init_theta: 0
init_theta_dot: 0
randomized_init: True
randomized_init: False
randomized_inertial_prop: False

init_state_randomization_info:
init_x:
distrib: 'uniform'
low: -0.05
high: 0.05
init_x_dot:
distrib: 'uniform'
low: -0.05
high: 0.05
init_z:
distrib: 'uniform'
low: -0.05
high: 0.05
init_z_dot:
distrib: 'uniform'
low: -0.05
high: 0.05
init_theta:
distrib: 'uniform'
low: -0.05
high: 0.05
init_theta_dot:
distrib: 'uniform'
low: -0.05
high: 0.05

task: traj_tracking
task_info:
trajectory_type: figure8
num_cycles: 2
trajectory_plane: 'xz'
trajectory_position_offset: [0, 1.]
trajectory_scale: 1.0
ilqr_ref: False
ilqr_traj_data: '../lqr/ilqr_ref_traj.npy'

inertial_prop:
M: 0.033
Expand All @@ -65,22 +36,18 @@ task_config:
rew_act_weight: [0.1, 0.1]
rew_exponential: True

disturbances:
observation:
- disturbance_func: white_noise
std: [5.6e-05, 1.5e-02, 2.9e-05, 8.0e-03, 1.3e-03, 3.6e-01, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
# disturbances:
# observation:
# - disturbance_func: white_noise
# std: [5.6e-05, 1.5e-04, 2.9e-05, 8.0e-04, 1.3e-04, 3.6e-04, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]

constraints:
- constraint_form: default_constraint
constrained_variable: state
# upper_bounds: [2, 1, 2, 1, 0.2, 2.5]
# lower_bounds: [-2, -1, 0, -1, -0.2, -2.5]
upper_bounds: [ 0.9, 2, 1.45, 2, 0.75, 3]
lower_bounds: [-0.9, -2, 0.55, -2, -0.75, -3]
- constraint_form: default_constraint
constrained_variable: input
upper_bounds: [0.58212, 0.7]
lower_bounds: [0.09702, -0.7]
# upper_bounds: [0.47628, 0.4]
# lower_bounds: [0.079, -0.4]

done_on_out_of_bound: True
done_on_violation: False
1 change: 0 additions & 1 deletion examples/rl/rl_experiment.sh
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ do
./config_overrides/${SYS}/${ALGO}_${SYS}.yaml \
--kv_overrides \
algo_config.training=False \
task_config.randomized_init=True \
task_config.task_info.num_cycles=2 \
task_config.task_info.ilqr_ref=False \
task_config.task_info.ilqr_traj_data='../lqr/ilqr_ref_traj.npy' \
Expand Down
6 changes: 3 additions & 3 deletions examples/rl/train_rl_model.sh
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,9 @@ do
./config_overrides/${SYS}/${SYS}_${TASK}.yaml \
--output_dir ./results/${EXP_NAME}/${SYS}_${ALGO}_data/${SEED}/ \
--seed ${SEED} \
--use_gpu \
--kv_overrides \
task_config.randomized_init=True
--use_gpu #\
# --kv_overrides \
# task_config.randomized_init=True
done

# Move the newly trained unsafe model.
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,4 @@ sf_config:

# Softening
soften_constraints: True
slack_cost: 250.0
slack_cost: 1000.0
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,16 @@ algo_config:
critic_lr: 0.001

# runner args
max_env_steps: 396000
max_env_steps: 2640000
rollout_batch_size: 1
rollout_steps: 660
eval_batch_size: 10

# misc
log_interval: 39600
save_interval: 396000
log_interval: 66000
save_interval: 1320000
num_checkpoints: 0
eval_interval: 39600
eval_interval: 66000
eval_save_best: True
tensorboard: False

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,35 +12,9 @@ task_config:
init_z_dot: 0
init_theta: 0
init_theta_dot: 0
randomized_init: True
randomized_init: False
randomized_inertial_prop: False

init_state_randomization_info:
init_x:
distrib: 'uniform'
low: -0.05
high: 0.05
init_x_dot:
distrib: 'uniform'
low: -0.05
high: 0.05
init_z:
distrib: 'uniform'
low: -0.05
high: 0.05
init_z_dot:
distrib: 'uniform'
low: -0.05
high: 0.05
init_theta:
distrib: 'uniform'
low: -0.05
high: 0.05
init_theta_dot:
distrib: 'uniform'
low: -0.05
high: 0.05

task: traj_tracking
task_info:
trajectory_type: figure8
Expand Down Expand Up @@ -74,8 +48,6 @@ task_config:
lower_bounds: [-0.9, -2, 0.55, -2, -0.75, -3]
- constraint_form: default_constraint
constrained_variable: input
upper_bounds: [0.58212, 0.4]
lower_bounds: [0.09702, -0.4]

done_on_out_of_bound: True
done_on_violation: False
10 changes: 9 additions & 1 deletion experiments/mpsc/mpsc_experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,14 @@ def run(plot=False, training=False, model='ppo'):
**config.task_config)
env = env_func()

config.task_config.constraints[0].upper_bounds = [0.899, 1.99, 1.449, 1.99, 0.749, 2.99]
config.task_config.constraints[0].lower_bounds = [-0.899, -1.99, 0.551, -1.99, -0.749, -2.99]
config.task_config.constraints[1].upper_bounds = [0.59, 0.436]
config.task_config.constraints[1].lower_bounds = [0.113, -0.436]
env_func = partial(make,
config.task,
**config.task_config)

# Setup controller.
ctrl = make(config.algo,
env_func,
Expand Down Expand Up @@ -162,4 +170,4 @@ def run_multiple_models(plot, all_models):

if __name__ == '__main__':
# run(plot=True, training=False, model='none')
run_multiple_models(plot=True, all_models=['none2', 'mpsf2'])
run_multiple_models(plot=True, all_models=['mpsf8'])
2 changes: 1 addition & 1 deletion experiments/mpsc/mpsc_experiment.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ SAFETY_FILTER='mpsc_acados'
MPSC_COST='precomputed_cost'
# MPSC_COST='one_step_cost'
MPSC_COST_HORIZON=25
DECAY_FACTOR=0.9
DECAY_FACTOR=1

python3 ./mpsc_experiment.py \
--task quadrotor \
Expand Down
Loading

0 comments on commit 7c43d8a

Please sign in to comment.