Skip to content

Commit

Permalink
Merge branch 'benchmark_mpsf' into benchmark_test
Browse files Browse the repository at this point in the history
  • Loading branch information
Federico-PizarroBejarano committed Nov 1, 2024
2 parents cd02203 + 68fd078 commit d646948
Show file tree
Hide file tree
Showing 39 changed files with 3,001 additions and 339 deletions.
14 changes: 5 additions & 9 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,13 @@ examples/mpsc/unsafe_rl_temp_data/
#
examples/pid/*data/
#
Results/
experiments/mpsc/results*
experiments/mpsc/models/rl_models*
#
results/
z_docstring.py
TODOs.md
#
#
hpo_study*/
hp_study*/
comparisons/
Expand Down Expand Up @@ -155,10 +157,4 @@ dmypy.json
.idea/

*c_generated_code/
acados_ocp_nlp.json
gpmpc_acados_ocp_solver.json
gpmpc_update/
temp
models/
benchmarking_sim/quadrotor/acados_ocp.json
acados_ocp.json
*acados_ocp*.json
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
safety_filter: linear_mpsc
sf_config:
# LQR controller parameters
r_lin:
r_mpc:
- 0.1
q_lin:
q_mpc:
- 1
- 0.1
- 1
Expand Down
Original file line number Diff line number Diff line change
@@ -1,35 +1,17 @@
algo: mpc_acados
algo_config:
# horizon: 40
# r_mpc:
# - 0.8
# - 0.8
# q_mpc:
# - 5.0
# - 0.1
# - 5.0
# - 0.1
# - 0.5
# - 0.001
horizon: 25
q_mpc: [18, 0.1, 18, 0.5, 0.5, 0.0001]
r_mpc: [3., 3.]
# Prior info
prior_info:
# prior_prop: null
prior_prop:
prior_prop:
beta_1: 18.11298
beta_2: 3.6800
beta_3: 0
alpha_1: -140.8
alpha_2: -13.4
alpha_3: 124.8
# pitch_bias: 0.0 # in radian
randomize_prior_prop: False
prior_prop_rand_info: null
warmstart: True
# soft_constraints: True
# use_RTI: True
output_dir: ./mpc_acados/results


Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,8 @@ task_config:
constraints:
- constraint_form: default_constraint
constrained_variable: state
upper_bounds: [ 0.9, 2, 1.45, 2, 0.75, 3]
lower_bounds: [-0.9, -2, 0.55, -2, -0.75, -3]
- constraint_form: default_constraint
constrained_variable: input
upper_bounds: [0.58212, 0.7]
Expand All @@ -84,4 +86,4 @@ task_config:
# mask: [1, 0, 0, 0]
observation:
- disturbance_func: white_noise
std: [5.6e-05, 1.5e-02, 2.9e-05, 8.0e-03, 1.3e-03, 3.5e-03]
std: [5.6e-05, 1.5e-02, 2.9e-05, 8.0e-03, 1.3e-03, 3.5e-03]
69 changes: 34 additions & 35 deletions benchmarking_sim/quadrotor/mb_experiment.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@

import os
import sys
import pickle
import sys
from collections import defaultdict
from functools import partial

Expand All @@ -10,16 +10,16 @@
from matplotlib.ticker import FormatStrFormatter

from safe_control_gym.envs.benchmark_env import Task
from safe_control_gym.envs.gym_pybullet_drones.quadrotor import Quadrotor
from safe_control_gym.experiments.base_experiment import BaseExperiment
from safe_control_gym.experiments.epoch_experiments import EpochExperiment
from safe_control_gym.utils.configuration import ConfigFactory
from safe_control_gym.utils.gpmpc_plotting import make_quad_plots
from safe_control_gym.utils.registration import make
from safe_control_gym.utils.utils import mkdirs, set_dir_from_config, timing
from safe_control_gym.envs.gym_pybullet_drones.quadrotor import Quadrotor
from safe_control_gym.utils.gpmpc_plotting import make_quad_plots

script_path = os.path.dirname(os.path.realpath(__file__))


@timing
def run(gui=False, n_episodes=1, n_steps=None, save_data=True):
'''The main function running experiments for model-based methods.
Expand All @@ -36,9 +36,9 @@ def run(gui=False, n_episodes=1, n_steps=None, save_data=True):
else:
# ALGO = 'ilqr'
# ALGO = 'gp_mpc'
ALGO = 'gpmpc_acados'
# ALGO = 'gpmpc_acados'
# ALGO = 'mpc'
# ALGO = 'mpc_acados'
ALGO = 'mpc_acados'
# ALGO = 'linear_mpc'
# ALGO = 'lqr'
# ALGO = 'lqr_c'
Expand All @@ -59,28 +59,28 @@ def run(gui=False, n_episodes=1, n_steps=None, save_data=True):
sys.argv[1:] = ['--algo', ALGO,
'--task', agent,
'--overrides',
f'./config_overrides/{SYS}_{TASK}.yaml',
f'./config_overrides/{ALGO}_{SYS}_{TASK}_{PRIOR}.yaml',
f'./config_overrides/{SYS}_{TASK}.yaml',
f'./config_overrides/{ALGO}_{SYS}_{TASK}_{PRIOR}.yaml',
'--seed', '9',
'--use_gpu', 'True',
'--output_dir', f'./{ALGO}/results',
]
]
else:
MPSC_COST='one_step_cost'
MPSC_COST = 'one_step_cost'
assert ALGO != 'gp_mpc', 'Safety filter not supported for gp_mpc'
assert os.path.exists(f'./config_overrides/{SAFETY_FILTER}_{SYS}_{TASK}_{PRIOR}.yaml'), f'./config_overrides/{SAFETY_FILTER}_{SYS}_{TASK}_{PRIOR}.yaml does not exist'
sys.argv[1:] = ['--algo', ALGO,
'--task', agent,
'--safety_filter', SAFETY_FILTER,
'--overrides',
f'./config_overrides/{SYS}_{TASK}.yaml',
f'./config_overrides/{ALGO}_{SYS}_{TASK}_{PRIOR}.yaml',
f'./config_overrides/{SAFETY_FILTER}_{SYS}_{TASK}_{PRIOR}.yaml',
f'./config_overrides/{SYS}_{TASK}.yaml',
f'./config_overrides/{ALGO}_{SYS}_{TASK}_{PRIOR}.yaml',
f'./config_overrides/{SAFETY_FILTER}_{SYS}_{TASK}_{PRIOR}.yaml',
'--kv_overrides', f'sf_config.cost_function={MPSC_COST}',
'--seed', '2',
'--use_gpu', 'True',
'--output_dir', f'./{ALGO}/results',
]
]
fac = ConfigFactory()
fac.add_argument('--func', type=str, default='train', help='main function to run.')
fac.add_argument('--n_episodes', type=int, default=1, help='number of episodes to run.')
Expand All @@ -89,7 +89,7 @@ def run(gui=False, n_episodes=1, n_steps=None, save_data=True):
if ALGO in ['gpmpc_acados', 'gp_mpc']:
num_data_max = config.algo_config.num_epochs * config.algo_config.num_samples
config.output_dir = os.path.join(config.output_dir, PRIOR + '_' + repr(num_data_max))
print('output_dir', config.algo_config.output_dir)
print('output_dir', config.algo_config.output_dir)
set_dir_from_config(config)
config.algo_config.output_dir = config.output_dir
mkdirs(config.output_dir)
Expand All @@ -108,17 +108,17 @@ def run(gui=False, n_episodes=1, n_steps=None, save_data=True):
seed=config.seed,
**config.algo_config
)

# Setup safety filter
if SAFETY_FILTER is not None:
env_func_filter = partial(make,
config.task,
seed=config.seed,
**config.task_config)
config.task,
seed=config.seed,
**config.task_config)
safety_filter = make(config.safety_filter,
env_func_filter,
seed=config.seed,
**config.sf_config)
env_func_filter,
seed=config.seed,
**config.sf_config)
safety_filter.reset()

all_trajs = defaultdict(list)
Expand All @@ -133,19 +133,19 @@ def run(gui=False, n_episodes=1, n_steps=None, save_data=True):
static_train_env = env_func(gui=False, randomized_init=False, init_state=init_state)

# Create experiment, train, and run evaluation
if SAFETY_FILTER is None:
if ALGO in ['gpmpc_acados', 'gp_mpc'] :
if SAFETY_FILTER is None:
if ALGO in ['gpmpc_acados', 'gp_mpc']:
experiment = BaseExperiment(env=static_env, ctrl=ctrl, train_env=static_train_env)
if config.algo_config.num_epochs == 1:
print('Evaluating prior controller')
elif config.algo_config.gp_model_path is not None:
ctrl.load(config.algo_config.gp_model_path)
else:
# manually launch training
# manually launch training
# (NOTE: not using launch_training method since calling plotting before eval will break the eval)
experiment.reset()
train_runs, test_runs = ctrl.learn(env=static_train_env)
else:
else:
experiment = BaseExperiment(env=static_env, ctrl=ctrl, train_env=static_train_env)
experiment.launch_training()
else:
Expand All @@ -165,14 +165,13 @@ def run(gui=False, n_episodes=1, n_steps=None, save_data=True):
if ALGO in ['gpmpc_acados', 'gp_mpc'] and \
config.algo_config.gp_model_path is None and \
config.algo_config.num_epochs > 1:
if isinstance(static_env, Quadrotor):
make_quad_plots(test_runs=test_runs,
train_runs=train_runs,
trajectory=ctrl.traj.T,
dir=ctrl.output_dir)
if isinstance(static_env, Quadrotor):
make_quad_plots(test_runs=test_runs,
train_runs=train_runs,
trajectory=ctrl.traj.T,
dir=ctrl.output_dir)
plot_quad_eval(trajs_data['obs'][0], trajs_data['action'][0], ctrl.env, config.output_dir)


# Close environments
static_env.close()
static_train_env.close()
Expand Down Expand Up @@ -249,15 +248,15 @@ def plot_quad_eval(state_stack, input_stack, env, save_path=None):

# plot the figure-eight
_, axs = plt.subplots(1)
axs.plot(np.array(state_stack).transpose()[0, 0:plot_length],
axs.plot(np.array(state_stack).transpose()[0, 0:plot_length],
np.array(state_stack).transpose()[2, 0:plot_length], label='actual')
axs.plot(reference.transpose()[0, 0:plot_length],
axs.plot(reference.transpose()[0, 0:plot_length],
reference.transpose()[2, 0:plot_length], color='r', label='desired')
axs.set_xlabel('x [m]')
axs.set_ylabel('z [m]')
axs.set_title('State path in x-z plane')
axs.legend()

if save_path is not None:
plt.savefig(os.path.join(save_path, 'state_path.png'))
print(f'Plots saved to {save_path}')
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
safety_filter: linear_mpsc
sf_config:
# LQR controller parameters
r_lin:
r_mpc:
- 0.1
q_lin:
q_mpc:
- 1
- 0.1
- 1
Expand Down
2 changes: 1 addition & 1 deletion examples/hpo/train_rl_model.sh
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ python ./hpo_experiment.py \
--overrides \
./rl/${ALGO}/config_overrides/${SYS}/${ALGO}_${SYS}.yaml \
./rl/config_overrides/${SYS}/${SYS}_${TASK}.yaml \
--output_dir ./Results/${SYS}_${ALGO}_data/ \
--output_dir ./results/${SYS}_${ALGO}_data/ \
--tag Test \
--opt_hps ./rl/${ALGO}/config_overrides/${SYS}/optimized_hyperparameters.yaml \
--task ${SYS} --seed 2 \
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
safety_filter: linear_mpsc
sf_config:
# LQR controller parameters
r_lin:
r_mpc:
- 0.1
q_lin:
q_mpc:
- 1
- 1
- 1
Expand Down
4 changes: 2 additions & 2 deletions examples/mpsc/config_overrides/cartpole/lqr_cartpole.yaml
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
algo: lqr
algo_config:
# Cost parameters
r_lin:
r_mpc:
- 0.1
q_lin:
q_mpc:
- 1
- 1
- 1
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
safety_filter: linear_mpsc
sf_config:
# LQR controller parameters
r_lin:
r_mpc:
- 0.1
q_lin:
q_mpc:
- 1
- 1
- 1
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
algo: lqr
algo_config:
# Cost parameters
r_lin:
r_mpc:
- 0.1
q_lin:
q_mpc:
- 1
- 1
- 1
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
safety_filter: linear_mpsc
sf_config:
# LQR controller parameters
r_lin:
r_mpc:
- 0.1
q_lin:
q_mpc:
- 0.01
- 0.01
- 0.01
Expand Down
4 changes: 2 additions & 2 deletions examples/rl/rl_experiment.sh
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,6 @@ do
task_config.task_info.ilqr_ref=False \
task_config.task_info.ilqr_traj_data='../lqr/ilqr_ref_traj.npy' \
task_config.noise_scale=${NS}
# --pretrain_path ./Results/Benchmark_data/ilqr_ref/${SYS}_${ALGO}_data/${SEED}
# --pretrain_path ./results/benchmark_data/ilqr_ref/${SYS}_${ALGO}_data/${SEED}
done
#done
#done
2 changes: 1 addition & 1 deletion examples/rl/train_rl_model.sh
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ do
--overrides \
./config_overrides/${SYS}/${ALGO}_${SYS}.yaml \
./config_overrides/${SYS}/${SYS}_${TASK}.yaml \
--output_dir ./Results/${EXP_NAME}/${SYS}_${ALGO}_data/${SEED}/ \
--output_dir ./results/${EXP_NAME}/${SYS}_${ALGO}_data/${SEED}/ \
--seed ${SEED} \
--use_gpu \
--kv_overrides \
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
algo: mpc_acados
algo_config:
horizon: 25
q_mpc: [18, 0.1, 18, 0.5, 0.5, 0.0001]
r_mpc: [3., 3.]
prior_info:
prior_prop:
beta_1: 18.11298
beta_2: 3.6800
beta_3: 0
alpha_1: -140.8
alpha_2: -13.4
alpha_3: 124.8
randomize_prior_prop: False
prior_prop_rand_info: null
warmstart: True
# soft_constraints: True
# use_RTI: True
output_dir: ./mpc_acados/results
Loading

0 comments on commit d646948

Please sign in to comment.