diff --git a/.gitignore b/.gitignore index 49e2bcb3f..31805cfd4 100644 --- a/.gitignore +++ b/.gitignore @@ -6,11 +6,13 @@ examples/mpsc/unsafe_rl_temp_data/ # examples/pid/*data/ # -Results/ +experiments/mpsc/results* +experiments/mpsc/models/rl_models* +# results/ z_docstring.py TODOs.md -# +# hpo_study*/ hp_study*/ comparisons/ @@ -155,10 +157,4 @@ dmypy.json .idea/ *c_generated_code/ -acados_ocp_nlp.json -gpmpc_acados_ocp_solver.json -gpmpc_update/ -temp -models/ -benchmarking_sim/quadrotor/acados_ocp.json -acados_ocp.json \ No newline at end of file +*acados_ocp*.json diff --git a/benchmarking_sim/cartpole/config_overrides/linear_mpsc_cartpole_stab_100.yaml b/benchmarking_sim/cartpole/config_overrides/linear_mpsc_cartpole_stab_100.yaml index 9485bf733..ed2df1ded 100644 --- a/benchmarking_sim/cartpole/config_overrides/linear_mpsc_cartpole_stab_100.yaml +++ b/benchmarking_sim/cartpole/config_overrides/linear_mpsc_cartpole_stab_100.yaml @@ -1,9 +1,9 @@ safety_filter: linear_mpsc sf_config: # LQR controller parameters - r_lin: + r_mpc: - 0.1 - q_lin: + q_mpc: - 1 - 0.1 - 1 diff --git a/benchmarking_sim/quadrotor/config_overrides/mpc_acados_quadrotor_2D_attitude_tracking_100.yaml b/benchmarking_sim/quadrotor/config_overrides/mpc_acados_quadrotor_2D_attitude_tracking_100.yaml index 43ab37420..f97365baf 100644 --- a/benchmarking_sim/quadrotor/config_overrides/mpc_acados_quadrotor_2D_attitude_tracking_100.yaml +++ b/benchmarking_sim/quadrotor/config_overrides/mpc_acados_quadrotor_2D_attitude_tracking_100.yaml @@ -1,35 +1,17 @@ algo: mpc_acados algo_config: - # horizon: 40 - # r_mpc: - # - 0.8 - # - 0.8 - # q_mpc: - # - 5.0 - # - 0.1 - # - 5.0 - # - 0.1 - # - 0.5 - # - 0.001 horizon: 25 q_mpc: [18, 0.1, 18, 0.5, 0.5, 0.0001] r_mpc: [3., 3.] - # Prior info prior_info: - # prior_prop: null - prior_prop: + prior_prop: beta_1: 18.11298 beta_2: 3.6800 beta_3: 0 alpha_1: -140.8 alpha_2: -13.4 alpha_3: 124.8 - # pitch_bias: 0.0 # in radian randomize_prior_prop: False prior_prop_rand_info: null warmstart: True - # soft_constraints: True - # use_RTI: True output_dir: ./mpc_acados/results - - diff --git a/benchmarking_sim/quadrotor/config_overrides/quadrotor_2D_attitude_tracking.yaml b/benchmarking_sim/quadrotor/config_overrides/quadrotor_2D_attitude_tracking.yaml index 52a6d9405..17da515a5 100644 --- a/benchmarking_sim/quadrotor/config_overrides/quadrotor_2D_attitude_tracking.yaml +++ b/benchmarking_sim/quadrotor/config_overrides/quadrotor_2D_attitude_tracking.yaml @@ -69,6 +69,8 @@ task_config: constraints: - constraint_form: default_constraint constrained_variable: state + upper_bounds: [ 0.9, 2, 1.45, 2, 0.75, 3] + lower_bounds: [-0.9, -2, 0.55, -2, -0.75, -3] - constraint_form: default_constraint constrained_variable: input upper_bounds: [0.58212, 0.7] @@ -84,4 +86,4 @@ task_config: # mask: [1, 0, 0, 0] observation: - disturbance_func: white_noise - std: [5.6e-05, 1.5e-02, 2.9e-05, 8.0e-03, 1.3e-03, 3.5e-03] \ No newline at end of file + std: [5.6e-05, 1.5e-02, 2.9e-05, 8.0e-03, 1.3e-03, 3.5e-03] diff --git a/benchmarking_sim/quadrotor/mb_experiment.py b/benchmarking_sim/quadrotor/mb_experiment.py index a8b549817..6bbcf8a46 100644 --- a/benchmarking_sim/quadrotor/mb_experiment.py +++ b/benchmarking_sim/quadrotor/mb_experiment.py @@ -1,7 +1,7 @@ import os -import sys import pickle +import sys from collections import defaultdict from functools import partial @@ -10,16 +10,16 @@ from matplotlib.ticker import FormatStrFormatter from safe_control_gym.envs.benchmark_env import Task +from safe_control_gym.envs.gym_pybullet_drones.quadrotor import Quadrotor from safe_control_gym.experiments.base_experiment import BaseExperiment -from safe_control_gym.experiments.epoch_experiments import EpochExperiment from safe_control_gym.utils.configuration import ConfigFactory +from safe_control_gym.utils.gpmpc_plotting import make_quad_plots from safe_control_gym.utils.registration import make from safe_control_gym.utils.utils import mkdirs, set_dir_from_config, timing -from safe_control_gym.envs.gym_pybullet_drones.quadrotor import Quadrotor -from safe_control_gym.utils.gpmpc_plotting import make_quad_plots script_path = os.path.dirname(os.path.realpath(__file__)) + @timing def run(gui=False, n_episodes=1, n_steps=None, save_data=True): '''The main function running experiments for model-based methods. @@ -36,9 +36,9 @@ def run(gui=False, n_episodes=1, n_steps=None, save_data=True): else: # ALGO = 'ilqr' # ALGO = 'gp_mpc' - ALGO = 'gpmpc_acados' + # ALGO = 'gpmpc_acados' # ALGO = 'mpc' - # ALGO = 'mpc_acados' + ALGO = 'mpc_acados' # ALGO = 'linear_mpc' # ALGO = 'lqr' # ALGO = 'lqr_c' @@ -59,28 +59,28 @@ def run(gui=False, n_episodes=1, n_steps=None, save_data=True): sys.argv[1:] = ['--algo', ALGO, '--task', agent, '--overrides', - f'./config_overrides/{SYS}_{TASK}.yaml', - f'./config_overrides/{ALGO}_{SYS}_{TASK}_{PRIOR}.yaml', + f'./config_overrides/{SYS}_{TASK}.yaml', + f'./config_overrides/{ALGO}_{SYS}_{TASK}_{PRIOR}.yaml', '--seed', '9', '--use_gpu', 'True', '--output_dir', f'./{ALGO}/results', - ] + ] else: - MPSC_COST='one_step_cost' + MPSC_COST = 'one_step_cost' assert ALGO != 'gp_mpc', 'Safety filter not supported for gp_mpc' assert os.path.exists(f'./config_overrides/{SAFETY_FILTER}_{SYS}_{TASK}_{PRIOR}.yaml'), f'./config_overrides/{SAFETY_FILTER}_{SYS}_{TASK}_{PRIOR}.yaml does not exist' sys.argv[1:] = ['--algo', ALGO, '--task', agent, '--safety_filter', SAFETY_FILTER, '--overrides', - f'./config_overrides/{SYS}_{TASK}.yaml', - f'./config_overrides/{ALGO}_{SYS}_{TASK}_{PRIOR}.yaml', - f'./config_overrides/{SAFETY_FILTER}_{SYS}_{TASK}_{PRIOR}.yaml', + f'./config_overrides/{SYS}_{TASK}.yaml', + f'./config_overrides/{ALGO}_{SYS}_{TASK}_{PRIOR}.yaml', + f'./config_overrides/{SAFETY_FILTER}_{SYS}_{TASK}_{PRIOR}.yaml', '--kv_overrides', f'sf_config.cost_function={MPSC_COST}', '--seed', '2', '--use_gpu', 'True', '--output_dir', f'./{ALGO}/results', - ] + ] fac = ConfigFactory() fac.add_argument('--func', type=str, default='train', help='main function to run.') fac.add_argument('--n_episodes', type=int, default=1, help='number of episodes to run.') @@ -89,7 +89,7 @@ def run(gui=False, n_episodes=1, n_steps=None, save_data=True): if ALGO in ['gpmpc_acados', 'gp_mpc']: num_data_max = config.algo_config.num_epochs * config.algo_config.num_samples config.output_dir = os.path.join(config.output_dir, PRIOR + '_' + repr(num_data_max)) - print('output_dir', config.algo_config.output_dir) + print('output_dir', config.algo_config.output_dir) set_dir_from_config(config) config.algo_config.output_dir = config.output_dir mkdirs(config.output_dir) @@ -108,17 +108,17 @@ def run(gui=False, n_episodes=1, n_steps=None, save_data=True): seed=config.seed, **config.algo_config ) - + # Setup safety filter if SAFETY_FILTER is not None: env_func_filter = partial(make, - config.task, - seed=config.seed, - **config.task_config) + config.task, + seed=config.seed, + **config.task_config) safety_filter = make(config.safety_filter, - env_func_filter, - seed=config.seed, - **config.sf_config) + env_func_filter, + seed=config.seed, + **config.sf_config) safety_filter.reset() all_trajs = defaultdict(list) @@ -133,19 +133,19 @@ def run(gui=False, n_episodes=1, n_steps=None, save_data=True): static_train_env = env_func(gui=False, randomized_init=False, init_state=init_state) # Create experiment, train, and run evaluation - if SAFETY_FILTER is None: - if ALGO in ['gpmpc_acados', 'gp_mpc'] : + if SAFETY_FILTER is None: + if ALGO in ['gpmpc_acados', 'gp_mpc']: experiment = BaseExperiment(env=static_env, ctrl=ctrl, train_env=static_train_env) if config.algo_config.num_epochs == 1: print('Evaluating prior controller') elif config.algo_config.gp_model_path is not None: ctrl.load(config.algo_config.gp_model_path) else: - # manually launch training + # manually launch training # (NOTE: not using launch_training method since calling plotting before eval will break the eval) experiment.reset() train_runs, test_runs = ctrl.learn(env=static_train_env) - else: + else: experiment = BaseExperiment(env=static_env, ctrl=ctrl, train_env=static_train_env) experiment.launch_training() else: @@ -165,14 +165,13 @@ def run(gui=False, n_episodes=1, n_steps=None, save_data=True): if ALGO in ['gpmpc_acados', 'gp_mpc'] and \ config.algo_config.gp_model_path is None and \ config.algo_config.num_epochs > 1: - if isinstance(static_env, Quadrotor): - make_quad_plots(test_runs=test_runs, - train_runs=train_runs, - trajectory=ctrl.traj.T, - dir=ctrl.output_dir) + if isinstance(static_env, Quadrotor): + make_quad_plots(test_runs=test_runs, + train_runs=train_runs, + trajectory=ctrl.traj.T, + dir=ctrl.output_dir) plot_quad_eval(trajs_data['obs'][0], trajs_data['action'][0], ctrl.env, config.output_dir) - # Close environments static_env.close() static_train_env.close() @@ -249,15 +248,15 @@ def plot_quad_eval(state_stack, input_stack, env, save_path=None): # plot the figure-eight _, axs = plt.subplots(1) - axs.plot(np.array(state_stack).transpose()[0, 0:plot_length], + axs.plot(np.array(state_stack).transpose()[0, 0:plot_length], np.array(state_stack).transpose()[2, 0:plot_length], label='actual') - axs.plot(reference.transpose()[0, 0:plot_length], + axs.plot(reference.transpose()[0, 0:plot_length], reference.transpose()[2, 0:plot_length], color='r', label='desired') axs.set_xlabel('x [m]') axs.set_ylabel('z [m]') axs.set_title('State path in x-z plane') axs.legend() - + if save_path is not None: plt.savefig(os.path.join(save_path, 'state_path.png')) print(f'Plots saved to {save_path}') diff --git a/examples/hpo/cartpole/config_overrides/linear_mpsc_cartpole_stab_100.yaml b/examples/hpo/cartpole/config_overrides/linear_mpsc_cartpole_stab_100.yaml index 9485bf733..ed2df1ded 100644 --- a/examples/hpo/cartpole/config_overrides/linear_mpsc_cartpole_stab_100.yaml +++ b/examples/hpo/cartpole/config_overrides/linear_mpsc_cartpole_stab_100.yaml @@ -1,9 +1,9 @@ safety_filter: linear_mpsc sf_config: # LQR controller parameters - r_lin: + r_mpc: - 0.1 - q_lin: + q_mpc: - 1 - 0.1 - 1 diff --git a/examples/hpo/train_rl_model.sh b/examples/hpo/train_rl_model.sh index 40be21d03..b3799c67d 100755 --- a/examples/hpo/train_rl_model.sh +++ b/examples/hpo/train_rl_model.sh @@ -32,7 +32,7 @@ python ./hpo_experiment.py \ --overrides \ ./rl/${ALGO}/config_overrides/${SYS}/${ALGO}_${SYS}.yaml \ ./rl/config_overrides/${SYS}/${SYS}_${TASK}.yaml \ - --output_dir ./Results/${SYS}_${ALGO}_data/ \ + --output_dir ./results/${SYS}_${ALGO}_data/ \ --tag Test \ --opt_hps ./rl/${ALGO}/config_overrides/${SYS}/optimized_hyperparameters.yaml \ --task ${SYS} --seed 2 \ diff --git a/examples/mpsc/config_overrides/cartpole/linear_mpsc_cartpole.yaml b/examples/mpsc/config_overrides/cartpole/linear_mpsc_cartpole.yaml index 7a538c0ec..0bc1572d7 100644 --- a/examples/mpsc/config_overrides/cartpole/linear_mpsc_cartpole.yaml +++ b/examples/mpsc/config_overrides/cartpole/linear_mpsc_cartpole.yaml @@ -1,9 +1,9 @@ safety_filter: linear_mpsc sf_config: # LQR controller parameters - r_lin: + r_mpc: - 0.1 - q_lin: + q_mpc: - 1 - 1 - 1 diff --git a/examples/mpsc/config_overrides/cartpole/lqr_cartpole.yaml b/examples/mpsc/config_overrides/cartpole/lqr_cartpole.yaml index f3dcfbd44..10e11a05e 100644 --- a/examples/mpsc/config_overrides/cartpole/lqr_cartpole.yaml +++ b/examples/mpsc/config_overrides/cartpole/lqr_cartpole.yaml @@ -1,9 +1,9 @@ algo: lqr algo_config: # Cost parameters - r_lin: + r_mpc: - 0.1 - q_lin: + q_mpc: - 1 - 1 - 1 diff --git a/examples/mpsc/config_overrides/quadrotor_2D/linear_mpsc_quadrotor_2D.yaml b/examples/mpsc/config_overrides/quadrotor_2D/linear_mpsc_quadrotor_2D.yaml index 476468a46..a98c88970 100644 --- a/examples/mpsc/config_overrides/quadrotor_2D/linear_mpsc_quadrotor_2D.yaml +++ b/examples/mpsc/config_overrides/quadrotor_2D/linear_mpsc_quadrotor_2D.yaml @@ -1,9 +1,9 @@ safety_filter: linear_mpsc sf_config: # LQR controller parameters - r_lin: + r_mpc: - 0.1 - q_lin: + q_mpc: - 1 - 1 - 1 diff --git a/examples/mpsc/config_overrides/quadrotor_2D/lqr_quadrotor_2D.yaml b/examples/mpsc/config_overrides/quadrotor_2D/lqr_quadrotor_2D.yaml index 3ff048ecf..bb776b073 100644 --- a/examples/mpsc/config_overrides/quadrotor_2D/lqr_quadrotor_2D.yaml +++ b/examples/mpsc/config_overrides/quadrotor_2D/lqr_quadrotor_2D.yaml @@ -1,9 +1,9 @@ algo: lqr algo_config: # Cost parameters - r_lin: + r_mpc: - 0.1 - q_lin: + q_mpc: - 1 - 1 - 1 diff --git a/examples/mpsc/config_overrides/quadrotor_3D/linear_mpsc_quadrotor_3D.yaml b/examples/mpsc/config_overrides/quadrotor_3D/linear_mpsc_quadrotor_3D.yaml index 1d0a2b8fa..de25250fd 100644 --- a/examples/mpsc/config_overrides/quadrotor_3D/linear_mpsc_quadrotor_3D.yaml +++ b/examples/mpsc/config_overrides/quadrotor_3D/linear_mpsc_quadrotor_3D.yaml @@ -1,9 +1,9 @@ safety_filter: linear_mpsc sf_config: # LQR controller parameters - r_lin: + r_mpc: - 0.1 - q_lin: + q_mpc: - 0.01 - 0.01 - 0.01 diff --git a/examples/rl/rl_experiment.sh b/examples/rl/rl_experiment.sh index 48a8f9c65..62d7a37b2 100755 --- a/examples/rl/rl_experiment.sh +++ b/examples/rl/rl_experiment.sh @@ -39,6 +39,6 @@ do task_config.task_info.ilqr_ref=False \ task_config.task_info.ilqr_traj_data='../lqr/ilqr_ref_traj.npy' \ task_config.noise_scale=${NS} -# --pretrain_path ./Results/Benchmark_data/ilqr_ref/${SYS}_${ALGO}_data/${SEED} +# --pretrain_path ./results/benchmark_data/ilqr_ref/${SYS}_${ALGO}_data/${SEED} done -#done \ No newline at end of file +#done diff --git a/examples/rl/train_rl_model.sh b/examples/rl/train_rl_model.sh index 8e74f590c..fddd121b1 100755 --- a/examples/rl/train_rl_model.sh +++ b/examples/rl/train_rl_model.sh @@ -54,7 +54,7 @@ do --overrides \ ./config_overrides/${SYS}/${ALGO}_${SYS}.yaml \ ./config_overrides/${SYS}/${SYS}_${TASK}.yaml \ - --output_dir ./Results/${EXP_NAME}/${SYS}_${ALGO}_data/${SEED}/ \ + --output_dir ./results/${EXP_NAME}/${SYS}_${ALGO}_data/${SEED}/ \ --seed ${SEED} \ --use_gpu \ --kv_overrides \ diff --git a/experiments/mpsc/config_overrides/mpc_acados_quadrotor_2D_attitude_tracking.yaml b/experiments/mpsc/config_overrides/mpc_acados_quadrotor_2D_attitude_tracking.yaml new file mode 100644 index 000000000..13a86602d --- /dev/null +++ b/experiments/mpsc/config_overrides/mpc_acados_quadrotor_2D_attitude_tracking.yaml @@ -0,0 +1,19 @@ +algo: mpc_acados +algo_config: + horizon: 25 + q_mpc: [18, 0.1, 18, 0.5, 0.5, 0.0001] + r_mpc: [3., 3.] + prior_info: + prior_prop: + beta_1: 18.11298 + beta_2: 3.6800 + beta_3: 0 + alpha_1: -140.8 + alpha_2: -13.4 + alpha_3: 124.8 + randomize_prior_prop: False + prior_prop_rand_info: null + warmstart: True + # soft_constraints: True + # use_RTI: True + output_dir: ./mpc_acados/results diff --git a/experiments/mpsc/config_overrides/mpsc_acados_quadrotor_2D_attitude.yaml b/experiments/mpsc/config_overrides/mpsc_acados_quadrotor_2D_attitude.yaml new file mode 100644 index 000000000..1152caa0c --- /dev/null +++ b/experiments/mpsc/config_overrides/mpsc_acados_quadrotor_2D_attitude.yaml @@ -0,0 +1,32 @@ +safety_filter: nl_mpsc +sf_config: + # LQR controller parameters + q_mpc: [18, 0.1, 18, 0.5, 0.5, 0.0001] + r_mpc: [3., 3.] + + prior_info: + prior_prop: + beta_1: 18.11298 + beta_2: 3.6800 + beta_3: 0 + alpha_1: -140.8 + alpha_2: -13.4 + alpha_3: 124.8 + randomize_prior_prop: False + prior_prop_rand_info: null + + # MPC Parameters + use_acados: True + horizon: 25 + warmstart: True + integration_algo: rk4 + use_terminal_set: False + + # Cost function + cost_function: one_step_cost + mpsc_cost_horizon: 5 + decay_factor: 0.85 + + # Softening + soften_constraints: True + slack_cost: 0.1 diff --git a/experiments/mpsc/config_overrides/nl_mpsc_quadrotor_2D_attitude.yaml b/experiments/mpsc/config_overrides/nl_mpsc_quadrotor_2D_attitude.yaml new file mode 100644 index 000000000..3df801790 --- /dev/null +++ b/experiments/mpsc/config_overrides/nl_mpsc_quadrotor_2D_attitude.yaml @@ -0,0 +1,30 @@ +safety_filter: nl_mpsc +sf_config: + # LQR controller parameters + q_mpc: [18, 0.1, 18, 0.5, 0.5, 0.0001] + r_mpc: [3., 3.] + + # MPC Parameters + use_acados: True + horizon: 25 + warmstart: True + integration_algo: rk4 + use_terminal_set: False + + # Prior info + prior_info: + prior_prop: null + randomize_prior_prop: False + prior_prop_rand_info: null + + # Learning disturbance bounds + n_samples: 6000 + + # Cost function + cost_function: one_step_cost + mpsc_cost_horizon: 5 + decay_factor: 0.85 + + # Softening + soften_constraints: True + slack_cost: 250 diff --git a/experiments/mpsc/config_overrides/ppo_quadrotor_2D_attitude.yaml b/experiments/mpsc/config_overrides/ppo_quadrotor_2D_attitude.yaml new file mode 100644 index 000000000..8ccea4db1 --- /dev/null +++ b/experiments/mpsc/config_overrides/ppo_quadrotor_2D_attitude.yaml @@ -0,0 +1,39 @@ +algo: ppo +algo_config: + # model args + hidden_dim: 128 + activation: tanh + + # loss args + gamma: 0.98 + use_gae: True + gae_lambda: 0.92 + clip_param: 0.2 + target_kl: 1.0e-2 + entropy_coef: 0.01 + + # optim args + opt_epochs: 20 + mini_batch_size: 256 + actor_lr: 0.001 + critic_lr: 0.001 + + # runner args + max_env_steps: 396000 + rollout_batch_size: 1 + rollout_steps: 660 + eval_batch_size: 10 + + # misc + log_interval: 6600 + save_interval: 0 + num_checkpoints: 0 + eval_interval: 6600 + eval_save_best: True + tensorboard: False + + # safety filter + filter_train_actions: True + penalize_sf_diff: True + sf_penalty: 75 + use_safe_reset: True diff --git a/experiments/mpsc/config_overrides/quadrotor_2D_attitude_tracking.yaml b/experiments/mpsc/config_overrides/quadrotor_2D_attitude_tracking.yaml new file mode 100644 index 000000000..c9081cab0 --- /dev/null +++ b/experiments/mpsc/config_overrides/quadrotor_2D_attitude_tracking.yaml @@ -0,0 +1,89 @@ +task_config: + info_in_reset: True + ctrl_freq: 60 + pyb_freq: 60 + physics: dyn_si + quad_type: 4 + + init_state: + init_x: 0 + init_x_dot: 0 + init_z: 1.0 + init_z_dot: 0 + init_theta: 0 + init_theta_dot: 0 + randomized_init: True + randomized_inertial_prop: False + + init_state_randomization_info: + init_x: + distrib: 'uniform' + low: -0.05 + high: 0.05 + init_x_dot: + distrib: 'uniform' + low: -0.05 + high: 0.05 + init_z: + distrib: 'uniform' + low: -0.05 + high: 0.05 + init_z_dot: + distrib: 'uniform' + low: -0.05 + high: 0.05 + init_theta: + distrib: 'uniform' + low: -0.05 + high: 0.05 + init_theta_dot: + distrib: 'uniform' + low: -0.05 + high: 0.05 + + + task: traj_tracking + task_info: + trajectory_type: figure8 + num_cycles: 2 + trajectory_plane: 'xz' + trajectory_position_offset: [0, 1.] + trajectory_scale: 1.0 + + inertial_prop: + M: 0.033 + Iyy: 1.4e-05 + beta_1: 18.11 + beta_2: 3.68 + beta_3: 0.0 + alpha_1: -140.8 + alpha_2: -13.4 + alpha_3: 124.8 + pitch_bias: 0.0 # in radian + + episode_len_sec: 11 + cost: rl_reward + obs_goal_horizon: 1 + + # RL Reward + rew_state_weight: [10., .0, 10., .0, .0, 0.0] + rew_act_weight: [.0, .0] + rew_exponential: True + + # disturbances: + # observation: + # - disturbance_func: white_noise + # std: [5.6e-05, 1.5e-04, 2.9e-05, 8.0e-04, 1.3e-04, 3.6e-04, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] + + constraints: + - constraint_form: default_constraint + constrained_variable: state + upper_bounds: [ 0.9, 2, 1.45, 2, 0.75, 3] + lower_bounds: [-0.9, -2, 0.55, -2, -0.75, -3] + - constraint_form: default_constraint + constrained_variable: input + upper_bounds: [0.58212, 0.4] + lower_bounds: [0.09702, -0.4] + + done_on_out_of_bound: True + done_on_violation: False diff --git a/experiments/mpsc/models/mpsc_parameters/linear_mpsc_quadrotor_2D_attitude.pkl b/experiments/mpsc/models/mpsc_parameters/linear_mpsc_quadrotor_2D_attitude.pkl new file mode 100644 index 000000000..cb616ada5 Binary files /dev/null and b/experiments/mpsc/models/mpsc_parameters/linear_mpsc_quadrotor_2D_attitude.pkl differ diff --git a/experiments/mpsc/models/mpsc_parameters/nl_mpsc_quadrotor_2D_attitude.pkl b/experiments/mpsc/models/mpsc_parameters/nl_mpsc_quadrotor_2D_attitude.pkl new file mode 100644 index 000000000..0c5896aa8 Binary files /dev/null and b/experiments/mpsc/models/mpsc_parameters/nl_mpsc_quadrotor_2D_attitude.pkl differ diff --git a/experiments/mpsc/mpsc_experiment.py b/experiments/mpsc/mpsc_experiment.py new file mode 100644 index 000000000..6edddf462 --- /dev/null +++ b/experiments/mpsc/mpsc_experiment.py @@ -0,0 +1,172 @@ +'''This script tests the MPSC safety filter implementation.''' + +import pickle +import shutil +from functools import partial + +import matplotlib.pyplot as plt +import numpy as np + +from safe_control_gym.envs.benchmark_env import Cost +from safe_control_gym.experiments.base_experiment import BaseExperiment, MetricExtractor +from safe_control_gym.safety_filters.mpsc.mpsc_utils import Cost_Function +from safe_control_gym.utils.configuration import ConfigFactory +from safe_control_gym.utils.registration import make + + +def run(plot=False, training=False, model='ppo'): + '''Main function to run MPSC experiments. + + Returns: + X_GOAL (np.ndarray): The goal (stabilization or reference trajectory) of the experiment. + uncert_results (dict): The results of the uncertified experiment. + uncert_metrics (dict): The metrics of the uncertified experiment. + cert_results (dict): The results of the certified experiment. + cert_metrics (dict): The metrics of the certified experiment. + ''' + + # Create the configuration dictionary. + fac = ConfigFactory() + config = fac.merge() + config.algo_config['training'] = False + config.task_config['done_on_violation'] = False + config.task_config['randomized_init'] = False + if config.algo in ['ppo', 'sac']: + config.task_config['cost'] = Cost.RL_REWARD + config.task_config['normalized_rl_action_space'] = True + else: + config.task_config['cost'] = Cost.QUADRATIC + config.task_config['normalized_rl_action_space'] = False + + system = 'quadrotor_2D_attitude' + + # Create an environment + env_func = partial(make, + config.task, + **config.task_config) + env = env_func() + + # Setup controller. + ctrl = make(config.algo, + env_func, + **config.algo_config, + output_dir='./temp') + + if config.algo in ['ppo', 'sac']: + # Load state_dict from trained. + ctrl.load(f'./models/rl_models/{model}/model_latest.pt') + + # Remove temporary files and directories + shutil.rmtree('./temp', ignore_errors=True) + + # Run without safety filter + experiment = BaseExperiment(env, ctrl) + uncert_results, uncert_metrics = experiment.run_evaluation(n_episodes=1) + ctrl.reset() + + # Setup MPSC. + safety_filter = make(config.safety_filter, + env_func, + **config.sf_config) + safety_filter.reset() + + if config.sf_config.cost_function == Cost_Function.PRECOMPUTED_COST: + safety_filter.cost_function.uncertified_controller = ctrl + safety_filter.cost_function.output_dir = '.' + + if training is True: + train_env = env_func(randomized_init=True, + init_state=None, + normalized_rl_action_space=False, + ) + safety_filter.learn(env=train_env) + safety_filter.save(path=f'./models/mpsc_parameters/{config.safety_filter}_{system}.pkl') + # raise SystemExit + else: + safety_filter.load(path=f'./models/mpsc_parameters/{config.safety_filter}_{system}.pkl') + + # Run with safety filter + experiment = BaseExperiment(env, ctrl, safety_filter=safety_filter) + cert_results, cert_metrics = experiment.run_evaluation(n_episodes=1) + experiment.close() + safety_filter.close() + + if plot is True: + fig = plt.figure() + ax = fig.add_subplot(1, 1, 1) + ax.plot(uncert_results['state'][0][:, 0], uncert_results['state'][0][:, 2], label='Uncertified', color='red') + ax.plot(cert_results['state'][0][:, 0], cert_results['state'][0][:, 2], label='Certified', color='green') + ax.plot(env.X_GOAL[:, 0], env.X_GOAL[:, 2], label='Reference', color='black', linestyle='dashdot') + rec1 = plt.Rectangle((0.9, 0), 2, 2, color='#f1d6d6') + rec2 = plt.Rectangle((-1.9, 0), 1, 2, color='#f1d6d6') + ax.add_patch(rec1) + ax.add_patch(rec2) + rec3 = plt.Rectangle((-0.9, 1.45), 0.975 * 2, 2, color='#f1d6d6') + rec4 = plt.Rectangle((-0.9, -0.45), 0.975 * 2, 1, color='#f1d6d6') + ax.add_patch(rec3) + ax.add_patch(rec4) + plt.xlim(-1.1, 1.1) + plt.ylim(0.45, 1.55) + plt.xlabel('x [m]') + plt.ylabel('z [m]') + plt.legend() + plt.show() + + elapsed_time_uncert = uncert_results['timestamp'][0][-1] - uncert_results['timestamp'][0][0] + elapsed_time_cert = cert_results['timestamp'][0][-1] - cert_results['timestamp'][0][0] + + mpsc_results = cert_results['safety_filter_data'][0] + corrections = mpsc_results['correction'][0] * 10.0 > np.linalg.norm(cert_results['current_physical_action'][0] - safety_filter.U_EQ[0], axis=1) + corrections = np.append(corrections, False) + + print('Total Uncertified (s):', elapsed_time_uncert) + print('Total Certified Time (s):', elapsed_time_cert) + print('Number of Corrections:', np.sum(corrections)) + print('Sum of Corrections:', np.linalg.norm(mpsc_results['correction'][0])) + print('Max Correction:', np.max(np.abs(mpsc_results['correction'][0]))) + print('Number of Feasible Iterations:', np.sum(mpsc_results['feasible'][0])) + print('Total Number of Iterations:', uncert_metrics['average_length']) + print('Total Number of Certified Iterations:', cert_metrics['average_length']) + print('Number of Violations:', uncert_metrics['average_constraint_violation']) + print('Number of Certified Violations:', cert_metrics['average_constraint_violation']) + + return env.X_GOAL, uncert_results, uncert_metrics, cert_results, cert_metrics + + +def run_multiple_models(plot, all_models): + '''Runs all models at every saved starting point.''' + + fac = ConfigFactory() + config = fac.merge() + + for model in all_models: + print(model) + for i in range(1): + X_GOAL, uncert_results, _, cert_results, _ = run(plot=plot, training=False, model=model) + if i == 0: + all_uncert_results, all_cert_results = uncert_results, cert_results + else: + for key in all_cert_results.keys(): + if key in all_uncert_results: + all_uncert_results[key].append(uncert_results[key][0]) + all_cert_results[key].append(cert_results[key][0]) + + met = MetricExtractor() + uncert_metrics = met.compute_metrics(data=all_uncert_results) + cert_metrics = met.compute_metrics(data=all_cert_results) + + all_results = {'uncert_results': all_uncert_results, + 'uncert_metrics': uncert_metrics, + 'cert_results': all_cert_results, + 'cert_metrics': cert_metrics, + 'config': config, + 'X_GOAL': X_GOAL} + + if not plot: + with open(f'./results_mpsc/{model}.pkl', 'wb') as f: + pickle.dump(all_results, f) + + +if __name__ == '__main__': + # run(plot=True, training=False, model='ppo') + run_multiple_models(plot=True, all_models=['none', 'mpsf']) diff --git a/experiments/mpsc/mpsc_experiment.sh b/experiments/mpsc/mpsc_experiment.sh new file mode 100755 index 000000000..678584020 --- /dev/null +++ b/experiments/mpsc/mpsc_experiment.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +SYS='quadrotor_2D_attitude' +TASK='tracking' +ALGO='ppo' + +SAFETY_FILTER='nl_mpsc' +# SAFETY_FILTER='mpsc_acados' +# MPSC_COST='precomputed_cost' +MPSC_COST='one_step_cost' +MPSC_COST_HORIZON=25 +DECAY_FACTOR=0.9 + +python3 ./mpsc_experiment.py \ + --task quadrotor \ + --algo ${ALGO} \ + --safety_filter ${SAFETY_FILTER} \ + --overrides \ + ./config_overrides/${SYS}_${TASK}.yaml \ + ./config_overrides/${ALGO}_${SYS}.yaml \ + ./config_overrides/${SAFETY_FILTER}_${SYS}.yaml \ + --kv_overrides \ + sf_config.cost_function=${MPSC_COST} \ + sf_config.mpsc_cost_horizon=${MPSC_COST_HORIZON} \ + sf_config.decay_factor=${DECAY_FACTOR} diff --git a/experiments/mpsc/plotting_results.py b/experiments/mpsc/plotting_results.py new file mode 100644 index 000000000..15a8986d0 --- /dev/null +++ b/experiments/mpsc/plotting_results.py @@ -0,0 +1,501 @@ +'''This script analyzes and plots the results from MPSC experiments.''' + +import pickle +import sys + +import matplotlib.pyplot as plt +import numpy as np + +from safe_control_gym.experiments.base_experiment import MetricExtractor +from safe_control_gym.safety_filters.mpsc.mpsc_utils import get_discrete_derivative +from safe_control_gym.utils.plotting import load_from_logs + +plot = True # Saves figure if False + +U_EQ = np.array([0.3, 0]) + +met = MetricExtractor() +met.verbose = False + + +def load_all_models(system, task, algo): + '''Loads the results of every experiment. + + Args: + system (str): The system to be plotted. + task (str): The task to be plotted (either 'stab' or 'track'). + algo (str): The controller to be plotted. + + Returns: + all_results (dict): A dictionary containing all the results. + ''' + + all_results = {} + + for model in ordered_models: + with open(f'./results_mpsc/{model}.pkl', 'rb') as f: + all_results[model] = pickle.load(f) + + return all_results + + +def extract_magnitude_of_corrections(results_data): + '''Extracts the magnitude of corrections from an experiment's data. + + Args: + results_data (dict): A dictionary containing all the data from the desired experiment. + + Returns: + magn_of_corrections (list): The list of magnitude of corrections for all experiments. + ''' + + magn_of_corrections = [np.linalg.norm(mpsc_results['correction'][0]) for mpsc_results in results_data['cert_results']['safety_filter_data']] + return magn_of_corrections + + +def extract_max_correction(results_data): + '''Extracts the max correction from an experiment's data. + + Args: + results_data (dict): A dictionary containing all the data from the desired experiment. + + Returns: + max_corrections (list): The list of max corrections for all experiments. + ''' + max_corrections = [np.max(np.abs(mpsc_results['correction'][0])) for mpsc_results in results_data['cert_results']['safety_filter_data']] + + return max_corrections + + +def extract_number_of_corrections(results_data): + '''Extracts the number of corrections from an experiment's data. + + Args: + results_data (dict): A dictionary containing all the data from the desired experiment. + + Returns: + num_corrections (list): The list of the number of corrections for all experiments. + ''' + num_corrections = [np.sum(mpsc_results['correction'][0] * 10.0 > np.linalg.norm(results_data['cert_results']['current_clipped_action'][i] - U_EQ, axis=1)) for i, mpsc_results in enumerate(results_data['cert_results']['safety_filter_data'])] + return num_corrections + + +def extract_feasible_iterations(results_data): + '''Extracts the number of feasible iterations from an experiment's data. + + Args: + results_data (dict): A dictionary containing all the data from the desired experiment. + + Returns: + feasible_iterations (list): The list of the number of feasible iterations for all experiments. + ''' + feasible_iterations = [np.sum(mpsc_results['feasible'][0]) for mpsc_results in results_data['cert_results']['safety_filter_data']] + return feasible_iterations + + +def extract_rmse(results_data, certified=True): + '''Extracts the RMSEs from an experiment's data. + + Args: + results_data (dict): A dictionary containing all the data from the desired experiment. + + Returns: + rmse (list): The list of RMSEs for all experiments. + ''' + if certified: + met.data = results_data['cert_results'] + rmse = np.asarray(met.get_episode_rmse()) + else: + met.data = results_data['uncert_results'] + rmse = np.asarray(met.get_episode_rmse()) + return rmse + + +def extract_length(results_data, certified=True): + '''Extracts the lengths from an experiment's data. + + Args: + results_data (dict): A dictionary containing all the data from the desired experiment. + + Returns: + length (list): The list of lengths for all experiments. + ''' + if certified: + met.data = results_data['cert_results'] + length = np.asarray(met.get_episode_lengths()) + else: + met.data = results_data['uncert_results'] + length = np.asarray(met.get_episode_lengths()) + return length + + +def extract_simulation_time(results_data, certified=True): + '''Extracts the simulation time from an experiment's data. + + Args: + results_data (dict): A dictionary containing all the data from the desired experiment. + certified (bool): Whether to extract the certified data or uncertified data. + + Returns: + sim_time (list): The list of simulation times for all experiments. + ''' + if certified: + sim_time = [timestamp[-1] - timestamp[0] for timestamp in results_data['cert_results']['timestamp']] + else: + sim_time = [timestamp[-1] - timestamp[0] for timestamp in results_data['uncert_results']['timestamp']] + + return sim_time + + +def extract_constraint_violations(results_data, certified=True): + '''Extracts the simulation time from an experiment's data. + + Args: + results_data (dict): A dictionary containing all the data from the desired experiment. + certified (bool): Whether to extract the certified data or uncertified data. + + Returns: + num_violations (list): The list of number of constraint violations for all experiments. + ''' + if certified: + met.data = results_data['cert_results'] + num_violations = np.asarray(met.get_episode_constraint_violation_steps()) + else: + met.data = results_data['uncert_results'] + num_violations = np.asarray(met.get_episode_constraint_violation_steps()) + + return num_violations + + +def extract_rate_of_change(results_data, certified=True, order=1, mode='input'): + '''Extracts the rate of change of a signal from an experiment's data. + + Args: + results_data (dict): A dictionary containing all the data from the desired experiment. + certified (bool): Whether to extract the certified data or uncertified data. + order (int): Either 1 or 2, denoting the order of the derivative. + mode (string): Either 'input' or 'correction', denoting which signal to use. + + Returns: + roc (list): The list of rate of changes. + ''' + n = min(results_data['cert_results']['current_clipped_action'][0].shape) + + if mode == 'input': + if certified: + all_signals = [actions - U_EQ for actions in results_data['cert_results']['current_clipped_action']] + else: + all_signals = [actions - U_EQ for actions in results_data['uncert_results']['current_clipped_action']] + elif mode == 'correction': + all_signals = [np.squeeze(mpsc_results['uncertified_action'][0]) - np.squeeze(mpsc_results['certified_action'][0]) for mpsc_results in results_data['cert_results']['safety_filter_data']] + + total_derivatives = [] + for signal in all_signals: + if n == 1: + ctrl_freq = 15 + if mode == 'correction': + signal = np.atleast_2d(signal).T + elif n > 1: + ctrl_freq = 50 + derivative = get_discrete_derivative(signal, ctrl_freq) + if order == 2: + derivative = get_discrete_derivative(derivative, ctrl_freq) + total_derivatives.append(np.linalg.norm(derivative, 'fro')) + + return total_derivatives + + +def extract_reward(results_data, certified): + '''Extracts the mean reward from an experiment's data. + + Args: + results_data (dict): A dictionary containing all the data from the desired experiment. + certified (bool): Whether to extract the certified data or uncertified data. + + Returns: + mean_reward (list): The list of mean rewards. + ''' + if certified: + met.data = results_data['cert_results'] + returns = np.asarray(met.get_episode_returns()) + else: + met.data = results_data['uncert_results'] + returns = np.asarray(met.get_episode_returns()) + + return returns + + +def extract_failed(results_data, certified): + '''Extracts the percent failed from an experiment's data. + + Args: + results_data (dict): A dictionary containing all the data from the desired experiment. + certified (bool): Whether to extract the certified data or uncertified data. + + Returns: + failed (list): The percent failed. + ''' + if certified: + data = results_data['cert_results'] + else: + data = results_data['uncert_results'] + + failed = [data['info'][i][-1]['out_of_bounds'] for i in range(len(data['info']))] + + return [np.mean(failed)] + + +def plot_model_comparisons(system, task, algo, data_extractor): + '''Plots the constraint violations of every controller for a specific experiment. + + Args: + system (str): The system to be plotted. + task (str): The task to be plotted (either 'stab' or 'track'). + algo (str): The controller to be plotted. + data_extractor (func): The function which extracts the desired data. + ''' + + all_results = load_all_models(system, task, algo) + + fig = plt.figure(figsize=(16.0, 10.0)) + ax = fig.add_subplot(111) + + labels = ordered_models + + data = [] + + for model in ordered_models: + exp_data = all_results[model] + data.append(data_extractor(exp_data)) + + ylabel = data_extractor.__name__.replace('extract_', '').replace('_', ' ').title() + ax.set_ylabel(ylabel, weight='bold', fontsize=45, labelpad=10) + + x = np.arange(1, len(labels) + 1) + ax.set_xticks(x, labels, weight='bold', fontsize=15, rotation=30, ha='right') + + medianprops = dict(linestyle='--', linewidth=2.5, color='black') + bplot = ax.boxplot(data, patch_artist=True, labels=labels, medianprops=medianprops, widths=[0.75] * len(labels), showfliers=False) + + for patch, color in zip(bplot['boxes'], colors.values()): + patch.set_facecolor(color) + + fig.tight_layout() + ax.set_ylim(ymin=0) + + ax.yaxis.grid(True) + + if plot is True: + plt.show() + else: + image_suffix = data_extractor.__name__.replace('extract_', '') + fig.savefig(f'./results_mpsc/{image_suffix}.png', dpi=300) + plt.close() + + +def normalize_actions(actions): + '''Normalizes an array of actions. + + Args: + actions (ndarray): The actions to be normalized. + + Returns: + normalized_actions (ndarray): The normalized actions. + ''' + if system_name == 'cartpole': + action_scale = 10.0 + normalized_actions = actions / action_scale + elif system_name == 'quadrotor_2D': + hover_thrust = 0.1323 + norm_act_scale = 0.1 + normalized_actions = (actions / hover_thrust - 1.0) / norm_act_scale + else: + hover_thrust = 0.06615 + norm_act_scale = 0.1 + normalized_actions = (actions / hover_thrust - 1.0) / norm_act_scale + + return normalized_actions + + +def plot_all_logs(system, task, algo): + '''Plots comparative plots of all the logs. + + Args: + system (str): The system to be plotted. + task (str): The task to be plotted (either 'stab' or 'track'). + algo (str): The controller to be plotted. + ''' + all_results = {} + + for model in ordered_models: + all_results[model] = [] + all_results[model].append(load_from_logs(f'./models/rl_models/{model}/logs/')) + + for key in all_results[ordered_models[0]][0].keys(): + if key == 'stat_eval/ep_return': + plot_log(key, all_results) + if key == 'stat/constraint_violation': + plot_log(key, all_results) + + +def plot_log(key, all_results): + '''Plots a comparative plot of the log 'key'. + + Args: + key (str): The name of the log to be plotted. + all_results (dict): A dictionary of all the logged results for all models. + ''' + fig = plt.figure(figsize=(16.0, 10.0)) + ax = fig.add_subplot(111) + + labels = ordered_models + + for model, label in zip(ordered_models, labels): + x = all_results[model][0][key][1] / 1000 + all_data = np.array([values[key][3] for values in all_results[model]]) + ax.plot(x, np.mean(all_data, axis=0), label=label, color=colors[model]) + ax.fill_between(x, np.min(all_data, axis=0), np.max(all_data, axis=0), alpha=0.3, edgecolor=colors[model], facecolor=colors[model]) + + ax.set_ylabel(key, weight='bold', fontsize=45, labelpad=10) + ax.set_xlabel('Training Episodes') + ax.legend() + + fig.tight_layout() + ax.yaxis.grid(True) + + if plot is True: + plt.show() + else: + image_suffix = key.replace('/', '__') + fig.savefig(f'./results_mpsc/{image_suffix}.png', dpi=300) + plt.close() + + +def benchmark_plot(system, task, algo): + all_results = load_all_models(system, task, algo) + X_GOAL = all_results['none']['X_GOAL'] + + uncert = all_results['none']['uncert_results'] + mpsf = all_results['True']['cert_results'] + none = all_results['False']['cert_results'] + for i in [1, 5]: + print('Uncert') + met.data = uncert + print('num_violations', calculate_state_violations(uncert, i)) + print('exp_return', np.asarray(met.get_episode_returns())[i]) + + print('\nNone') + met.data = none + print('num_violations', calculate_state_violations(none, i)) + print('exp_return', np.asarray(met.get_episode_returns())[i]) + + print('\nMPSF') + met.data = mpsf + print('num_violations', calculate_state_violations(mpsf, i)) + print('exp_return', np.asarray(met.get_episode_returns())[i]) + print('---------') + + fig = plt.figure() + ax = fig.add_subplot(1, 1, 1) + ax.plot(uncert['state'][i][:, 0], uncert['state'][i][:, 2], label='Uncertified', color='red') + ax.plot(none['state'][i][:, 0], none['state'][i][:, 2], label='Certified (Std.)', color='cornflowerblue') + ax.plot(mpsf['state'][i][:, 0], mpsf['state'][i][:, 2], label='Certified (Ours)', color='forestgreen') + ax.plot(X_GOAL[:, 0], X_GOAL[:, 2], label='Reference', color='black', linestyle='dashdot') + rec1 = plt.Rectangle((0.9, 0), 2, 2, color='#f1d6d6') + rec2 = plt.Rectangle((-1.9, 0), 1, 2, color='#f1d6d6') + ax.add_patch(rec1) + ax.add_patch(rec2) + rec3 = plt.Rectangle((-0.9, 1.45), 0.975 * 2, 2, color='#f1d6d6') + rec4 = plt.Rectangle((-0.9, -0.45), 0.975 * 2, 1, color='#f1d6d6') + ax.add_patch(rec3) + ax.add_patch(rec4) + plt.xlim(-1.1, 1.1) + plt.ylim(0.45, 1.55) + plt.xlabel('x [m]') + plt.ylabel('z [m]') + plt.legend() + plt.show() + + +def calculate_state_violations(data, i): + states = data['state'][i] + num_viols = np.sum(np.any(states[:, [0, 2]] > [0.9, 1.45], axis=1) | np.any(states[:, [0, 2]] < [-0.9, 0.55], axis=1)) + return num_viols + + +if __name__ == '__main__': + ordered_models = ['mpsf', 'True', 'ppo', 'none', 'False'] + + colors = { + 'mpsf': 'royalblue', + 'True': 'cornflowerblue', + 'ppo': 'forestgreen', + 'none': 'plum', + 'False': 'violet', + } + + def extract_rate_of_change_of_inputs(results_data, certified=True): + return extract_rate_of_change(results_data, certified, order=1, mode='input') + + def extract_roc_cert(results_data, certified=True): + return extract_rate_of_change_of_inputs(results_data, certified) + + def extract_roc_uncert(results_data, certified=False): + return extract_rate_of_change_of_inputs(results_data, certified) + + def extract_rmse_cert(results_data, certified=True): + return extract_rmse(results_data, certified) + + def extract_rmse_uncert(results_data, certified=False): + return extract_rmse(results_data, certified) + + def extract_constraint_violations_cert(results_data, certified=True): + return extract_constraint_violations(results_data, certified) + + def extract_constraint_violations_uncert(results_data, certified=False): + return extract_constraint_violations(results_data, certified) + + def extract_reward_cert(results_data, certified=True): + return extract_reward(results_data, certified) + + def extract_reward_uncert(results_data, certified=False): + return extract_reward(results_data, certified) + + def extract_failed_cert(results_data, certified=True): + return extract_failed(results_data, certified) + + def extract_failed_uncert(results_data, certified=False): + return extract_failed(results_data, certified) + + def extract_length_cert(results_data, certified=True): + return extract_length(results_data, certified) + + def extract_length_uncert(results_data, certified=False): + return extract_length(results_data, certified) + + system_name = 'quadrotor_2D_attitude' + task_name = 'track' + algo_name = 'ppo' + if len(sys.argv) == 4: + system_name = sys.argv[1] + task_name = sys.argv[2] + algo_name = sys.argv[3] + + benchmark_plot(system_name, task_name, algo_name) + # plot_all_logs(system_name, task_name, algo_name) + # plot_model_comparisons(system_name, task_name, algo_name, extract_magnitude_of_corrections) + # plot_model_comparisons(system_name, task_name, algo_name, extract_max_correction) + # plot_model_comparisons(system_name, task_name, algo_name, extract_roc_cert) + # plot_model_comparisons(system_name, task_name, algo_name, extract_roc_uncert) + # plot_model_comparisons(system_name, task_name, algo_name, extract_rmse_cert) + # plot_model_comparisons(system_name, task_name, algo_name, extract_rmse_uncert) + # plot_model_comparisons(system_name, task_name, algo_name, extract_constraint_violations_cert) + # plot_model_comparisons(system_name, task_name, algo_name, extract_constraint_violations_uncert) + # plot_model_comparisons(system_name, task_name, algo_name, extract_number_of_corrections) + # plot_model_comparisons(system_name, task_name, algo_name, extract_length_cert) + # plot_model_comparisons(system_name, task_name, algo_name, extract_length_uncert) + # plot_model_comparisons(system_name, task_name, algo_name, extract_reward_cert) + # plot_model_comparisons(system_name, task_name, algo_name, extract_reward_uncert) + # plot_model_comparisons(system_name, task_name, algo_name, extract_failed_cert) + # plot_model_comparisons(system_name, task_name, algo_name, extract_failed_uncert) + # plot_model_comparisons(system_name, task_name, algo_name, extract_feasible_iterations) diff --git a/experiments/mpsc/train_model.sh b/experiments/mpsc/train_model.sh new file mode 100755 index 000000000..7ea77edfe --- /dev/null +++ b/experiments/mpsc/train_model.sh @@ -0,0 +1,35 @@ +#!/bin/bash + +SYS='quadrotor_2D_attitude' +TASK='tracking' +ALGO='ppo' + +SAFETY_FILTER='nl_mpsc' +MPSC_COST='one_step_cost' +FILTER=False +SF_PEN=1 + +if [ "$FILTER" == 'True' ]; then + TAG=mpsf4 +else + TAG=none4 +fi + +# Train the unsafe controller/agent. +python3 train_rl.py \ + --task quadrotor \ + --algo ${ALGO} \ + --safety_filter ${SAFETY_FILTER} \ + --overrides \ + ./config_overrides/${SYS}_${TASK}.yaml \ + ./config_overrides/${ALGO}_${SYS}.yaml \ + ./config_overrides/${SAFETY_FILTER}_${SYS}.yaml \ + --output_dir ./models/rl_models/${TAG}/ \ + --seed 2 \ + --kv_overrides \ + sf_config.cost_function=${MPSC_COST} \ + sf_config.soften_constraints=True \ + algo_config.filter_train_actions=${FILTER} \ + algo_config.use_safe_reset=${FILTER} \ + algo_config.penalize_sf_diff=${FILTER} \ + algo_config.sf_penalty=${SF_PEN} \ diff --git a/experiments/mpsc/train_rl.py b/experiments/mpsc/train_rl.py new file mode 100644 index 000000000..ace83d3d8 --- /dev/null +++ b/experiments/mpsc/train_rl.py @@ -0,0 +1,96 @@ +'''Template training/plotting/testing script.''' + +import os +import shutil +import time +from functools import partial + +import munch +import yaml + +from safe_control_gym.safety_filters.mpsc.mpsc_utils import Cost_Function +from safe_control_gym.utils.configuration import ConfigFactory +from safe_control_gym.utils.plotting import plot_from_logs +from safe_control_gym.utils.registration import make +from safe_control_gym.utils.utils import mkdirs, set_device_from_config, set_seed_from_config + + +def train(): + '''Training template. + + TODO: Add restore functionality + ''' + # Create the configuration dictionary. + fac = ConfigFactory() + config = fac.merge() + config.algo_config['training'] = True + + shutil.rmtree(config.output_dir, ignore_errors=True) + + system = 'quadrotor_2D_attitude' + + set_seed_from_config(config) + set_device_from_config(config) + + # Define function to create task/env. + env_func = partial(make, + config.task, + output_dir=config.output_dir, + **config.task_config + ) + + # Create the controller/control_agent. + ctrl = make(config.algo, + env_func, + checkpoint_path=os.path.join(config.output_dir, 'model_latest.pt'), + output_dir=config.output_dir, + **config.algo_config) + ctrl.reset() + + # Setup MPSC. + if config.algo in ['ppo', 'sac']: + safety_filter = make(config.safety_filter, + env_func, + **config.sf_config) + safety_filter.reset() + + if config.sf_config.cost_function == Cost_Function.PRECOMPUTED_COST: + safety_filter.cost_function.uncertified_controller = ctrl + safety_filter.cost_function.output_dir = '.' + if config.algo == 'pid': + ctrl.save('./temp-data/saved_controller_prev.npy') + + safety_filter.load(path=f'./models/mpsc_parameters/{config.safety_filter}_{system}.pkl') + + ctrl.safety_filter = safety_filter + + # Training. + start_time = time.time() + ctrl.learn() + config['logging'] = {'total_learning_time': time.time() - start_time} + ctrl.close() + print('Training done.') + + with open(os.path.join(config.output_dir, 'config.yaml'), 'w', encoding='UTF-8') as file: + yaml.dump(munch.unmunchify(config), file, default_flow_style=False) + + make_plots(config) + + +def make_plots(config): + '''Produces plots for logged stats during training. + Usage + * use with `--func plot` and `--restore {dir_path}` where `dir_path` is + the experiment folder containing the logs. + * save figures under `dir_path/plots/`. + ''' + # Define source and target log locations. + log_dir = os.path.join(config.output_dir, 'logs') + plot_dir = os.path.join(config.output_dir, 'plots') + mkdirs(plot_dir) + plot_from_logs(log_dir, plot_dir, window=3) + print('Plotting done.') + + +if __name__ == '__main__': + train() diff --git a/safe_control_gym/controllers/ppo/ppo.py b/safe_control_gym/controllers/ppo/ppo.py index 2be2b2488..d19e16ae5 100644 --- a/safe_control_gym/controllers/ppo/ppo.py +++ b/safe_control_gym/controllers/ppo/ppo.py @@ -1,4 +1,4 @@ -"""Proximal Policy Optimization (PPO) +'''Proximal Policy Optimization (PPO) Based on: * https://github.com/openai/spinningup/blob/master/spinup/algos/pytorch/ppo/ppo.py @@ -10,7 +10,7 @@ * pytorch-a2c-ppo-acktr-gail - https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail * openai spinning up - ppo - https://github.com/openai/spinningup/tree/master/spinup/algos/pytorch/ppo * stable baselines3 - ppo - https://github.com/DLR-RM/stable-baselines3/tree/master/stable_baselines3/ppo -""" +''' import os import time @@ -30,7 +30,7 @@ class PPO(BaseController): - """Proximal policy optimization.""" + '''Proximal policy optimization.''' def __init__(self, env_func, @@ -40,8 +40,11 @@ def __init__(self, use_gpu=False, seed=0, **kwargs): + self.filter_train_actions = False + self.penalize_sf_diff = False + self.sf_penalty = 1 + self.use_safe_reset = False super().__init__(env_func, training, checkpoint_path, output_dir, use_gpu, seed, **kwargs) - # Task. if self.training: # Training and testing. @@ -49,6 +52,7 @@ def __init__(self, self.env = VecRecordEpisodeStatistics(self.env, self.deque_size) self.eval_env = env_func(seed=seed * 111) self.eval_env = RecordEpisodeStatistics(self.eval_env, self.deque_size) + self.model = self.get_prior(self.eval_env, self.prior_info) else: # Testing only. self.env = env_func() @@ -84,8 +88,11 @@ def __init__(self, use_tensorboard = False self.logger = ExperimentLogger(output_dir, log_file_out=log_file_out, use_tensorboard=use_tensorboard) + # Adding safety filter + self.safety_filter = None + def reset(self): - """Do initializations for training or evaluation.""" + '''Do initializations for training or evaluation.''' if self.training: # set up stats tracking self.env.add_tracker('constraint_violation', 0) @@ -94,7 +101,9 @@ def reset(self): self.eval_env.add_tracker('mse', 0, mode='queue') self.total_steps = 0 - obs, _ = self.env.reset() + obs, info = self.env_reset(self.env, self.use_safe_reset) + self.info = info['n'][0] + self.true_obs = obs self.obs = self.obs_normalizer(obs) else: # Add episodic stats to be tracked. @@ -103,16 +112,16 @@ def reset(self): self.env.add_tracker('mse', 0, mode='queue') def close(self): - """Shuts down and cleans up lingering resources.""" + '''Shuts down and cleans up lingering resources.''' self.env.close() if self.training: self.eval_env.close() self.logger.close() def save(self, - path + path, ): - """Saves model params and experiment state to checkpoint path.""" + '''Saves model params and experiment state to checkpoint path.''' path_dir = os.path.dirname(path) os.makedirs(path_dir, exist_ok=True) state_dict = { @@ -131,10 +140,10 @@ def save(self, torch.save(state_dict, path) def load(self, - path + path, ): - """Restores model and experiment given checkpoint path.""" - state = torch.load(path) + '''Restores model and experiment given checkpoint path.''' + state = torch.load(path, map_location=torch.device('cpu')) # Restore policy. self.agent.load_state_dict(state['agent']) self.obs_normalizer.load_state_dict(state['obs_normalizer']) @@ -151,45 +160,26 @@ def learn(self, env=None, **kwargs ): - """Performs learning (pre-training, training, fine-tuning, etc.).""" - - # Initial Evaluation. - eval_results = self.run(env=self.eval_env, n_episodes=self.eval_batch_size) - self.logger.info('Eval | ep_lengths {:.2f} +/- {:.2f} | ep_return {:.3f} +/- {:.3f}'.format( - eval_results['ep_lengths'].mean(), - eval_results['ep_lengths'].std(), - eval_results['ep_returns'].mean(), - eval_results['ep_returns'].std())) - - if self.num_checkpoints > 0: - step_interval = np.linspace(0, self.max_env_steps, self.num_checkpoints) - interval_save = np.zeros_like(step_interval, dtype=bool) + '''Performs learning (pre-training, training, fine-tuning, etc).''' while self.total_steps < self.max_env_steps: results = self.train_step() # Checkpoint. - if (self.total_steps >= self.max_env_steps - or (self.save_interval and self.total_steps % self.save_interval == 0)): + if self.total_steps >= self.max_env_steps or (self.save_interval and self.total_steps % self.save_interval == 0): # Latest/final checkpoint. self.save(self.checkpoint_path) self.logger.info(f'Checkpoint | {self.checkpoint_path}') - path = os.path.join(self.output_dir, 'checkpoints', 'model_{}.pt'.format(self.total_steps)) + if self.num_checkpoints and self.total_steps % (self.max_env_steps // self.num_checkpoints) == 0: + # Intermediate checkpoint. + path = os.path.join(self.output_dir, 'checkpoints', f'model_{self.total_steps}.pt') self.save(path) - if self.num_checkpoints > 0: - interval_id = np.argmin(np.abs(np.array(step_interval) - self.total_steps)) - if interval_save[interval_id] is False: - # Intermediate checkpoint. - path = os.path.join(self.output_dir, 'checkpoints', f'model_{self.total_steps}.pt') - self.save(path) - interval_save[interval_id] = True # Evaluation. if self.eval_interval and self.total_steps % self.eval_interval == 0: eval_results = self.run(env=self.eval_env, n_episodes=self.eval_batch_size) results['eval'] = eval_results - self.logger.info('Eval | ep_lengths {:.2f} +/- {:.2f} | ep_return {:.3f} +/- {:.3f}'.format( - eval_results['ep_lengths'].mean(), - eval_results['ep_lengths'].std(), - eval_results['ep_returns'].mean(), - eval_results['ep_returns'].std())) + self.logger.info('Eval | ep_lengths {:.2f} +/- {:.2f} | ep_return {:.3f} +/- {:.3f}'.format(eval_results['ep_lengths'].mean(), + eval_results['ep_lengths'].std(), + eval_results['ep_returns'].mean(), + eval_results['ep_returns'].std())) # Save best model. eval_score = eval_results['ep_returns'].mean() eval_best_score = getattr(self, 'eval_best_score', -np.infty) @@ -201,7 +191,7 @@ def learn(self, self.log_step(results) def select_action(self, obs, info=None): - """Determine the action to take at the current timestep. + '''Determine the action to take at the current timestep. Args: obs (ndarray): The observation at this timestep. @@ -209,66 +199,21 @@ def select_action(self, obs, info=None): Returns: action (ndarray): The action chosen by the controller. - """ + ''' with torch.no_grad(): obs = torch.FloatTensor(obs).to(self.device) action = self.agent.ac.act(obs) - return action - def train_step(self): - """Performs a training/fine-tuning step.""" - self.agent.train() - self.obs_normalizer.unset_read_only() - rollouts = PPOBuffer(self.env.observation_space, self.env.action_space, self.rollout_steps, self.rollout_batch_size) - obs = self.obs - start = time.time() - for _ in range(self.rollout_steps): - with torch.no_grad(): - act, v, logp = self.agent.ac.step(torch.FloatTensor(obs).to(self.device)) - next_obs, rew, done, info = self.env.step(act) - next_obs = self.obs_normalizer(next_obs) - rew = self.reward_normalizer(rew, done) - mask = 1 - done.astype(float) - # Time truncation is not the same as true termination. - terminal_v = np.zeros_like(v) - for idx, inf in enumerate(info['n']): - if 'terminal_info' not in inf: - continue - inff = inf['terminal_info'] - if 'TimeLimit.truncated' in inff and inff['TimeLimit.truncated']: - terminal_obs = inf['terminal_observation'] - terminal_obs_tensor = torch.FloatTensor(terminal_obs).unsqueeze(0).to(self.device) - terminal_val = self.agent.ac.critic(terminal_obs_tensor).squeeze().detach().cpu().numpy() - terminal_v[idx] = terminal_val - rollouts.push({'obs': obs, 'act': act, 'rew': rew, 'mask': mask, 'v': v, 'logp': logp, 'terminal_v': terminal_v}) - obs = next_obs - self.obs = obs - self.total_steps += self.rollout_batch_size * self.rollout_steps - # Learn from rollout batch. - last_val = self.agent.ac.critic(torch.FloatTensor(obs).to(self.device)).detach().cpu().numpy() - ret, adv = compute_returns_and_advantages(rollouts.rew, - rollouts.v, - rollouts.mask, - rollouts.terminal_v, - last_val, - gamma=self.gamma, - use_gae=self.use_gae, - gae_lambda=self.gae_lambda) - rollouts.ret = ret - # Prevent divide-by-0 for repetitive tasks. - rollouts.adv = (adv - adv.mean()) / (adv.std() + 1e-6) - results = self.agent.update(rollouts, self.device) - results.update({'step': self.total_steps, 'elapsed_time': time.time() - start}) - return results + return action def run(self, env=None, render=False, - n_episodes=1, + n_episodes=10, verbose=False, ): - """Runs evaluation with current policy.""" + '''Runs evaluation with current policy.''' self.agent.eval() self.obs_normalizer.set_read_only() if env is None: @@ -281,15 +226,30 @@ def run(self, env.add_tracker('constraint_values', 0, mode='queue') env.add_tracker('mse', 0, mode='queue') - obs, info = env.reset() + obs, info = self.env_reset(env, True) + true_obs = obs obs = self.obs_normalizer(obs) ep_returns, ep_lengths = [], [] frames = [] - mse, ep_rmse = [], [] + total_return = 0 + start = time.time() while len(ep_returns) < n_episodes: action = self.select_action(obs=obs, info=info) - obs, _, done, info = env.step(action) - mse.append(info["mse"]) + + # Adding safety filter + success = False + physical_action = env.denormalize_action(action) + unextended_obs = np.squeeze(true_obs)[:env.symbolic.nx] + certified_action, success = self.safety_filter.certify_action(unextended_obs, physical_action, info) + if success: + action = env.normalize_action(certified_action) + else: + self.safety_filter.ocp_solver.reset() + + action = np.atleast_2d(np.squeeze([action])) + obs, rew, done, info = env.step(action) + total_return += rew + if render: env.render() frames.append(env.render('rgb_array')) @@ -297,18 +257,20 @@ def run(self, print(f'obs {obs} | act {action}') if done: assert 'episode' in info - ep_rmse.append(np.array(mse).mean()**0.5) - mse = [] - ep_returns.append(info['episode']['r']) + ep_returns.append(total_return) ep_lengths.append(info['episode']['l']) - obs, _ = env.reset() + obs, info = self.env_reset(env, True) + total_return = 0 + true_obs = obs obs = self.obs_normalizer(obs) # Collect evaluation results. ep_lengths = np.asarray(ep_lengths) ep_returns = np.asarray(ep_returns) - eval_results = {'ep_returns': ep_returns, 'ep_lengths': ep_lengths, - 'rmse': np.array(ep_rmse).mean(), - 'rmse_std': np.array(ep_rmse).std()} + eval_results = { + 'ep_returns': ep_returns, + 'ep_lengths': ep_lengths, + 'elapsed_time': time.time() - start + } if len(frames) > 0: eval_results['frames'] = frames # Other episodic stats from evaluation env. @@ -317,20 +279,95 @@ def run(self, eval_results.update(queued_stats) return eval_results + def train_step(self): + '''Performs a training/fine-tuning step.''' + self.agent.train() + self.obs_normalizer.unset_read_only() + rollouts = PPOBuffer(self.env.observation_space, self.env.action_space, self.rollout_steps, self.rollout_batch_size) + obs = self.obs + true_obs = self.true_obs + info = self.info + start = time.time() + for _ in range(self.rollout_steps): + with torch.no_grad(): + action, v, logp = self.agent.ac.step(torch.FloatTensor(obs).to(self.device)) + unsafe_action = action + + # Adding safety filter + success = False + if self.safety_filter is not None and (self.filter_train_actions is True or self.penalize_sf_diff is True): + physical_action = self.env.envs[0].denormalize_action(action) + unextended_obs = np.squeeze(true_obs)[:self.env.envs[0].symbolic.nx] + certified_action, success = self.safety_filter.certify_action(unextended_obs, physical_action, info) + if success and self.filter_train_actions is True: + action = self.env.envs[0].normalize_action(certified_action) + else: + self.safety_filter.ocp_solver.reset() + + action = np.atleast_2d(np.squeeze([action])) + next_obs, rew, done, info = self.env.step(action) + if done[0] and self.use_safe_reset: + next_obs, info = self.env_reset(self.env, self.use_safe_reset) + if self.penalize_sf_diff and success: + rew = np.log(rew) + rew -= self.sf_penalty * np.linalg.norm(physical_action - certified_action) + rew = np.exp(rew) + next_true_obs = next_obs + next_obs = self.obs_normalizer(next_obs) + rew = self.reward_normalizer(rew, done) + mask = 1 - done.astype(float) + # Time truncation is not the same as true termination. + terminal_v = np.zeros_like(v) + for idx, inf in enumerate(info['n']): + if 'terminal_info' not in inf: + continue + inff = inf['terminal_info'] + if 'TimeLimit.truncated' in inff and inff['TimeLimit.truncated']: + terminal_obs = inf['terminal_observation'] + terminal_obs_tensor = torch.FloatTensor(terminal_obs).unsqueeze(0).to(self.device) + terminal_val = self.agent.ac.critic(terminal_obs_tensor).squeeze().detach().cpu().numpy() + terminal_v[idx] = terminal_val + + rollouts.push({'obs': obs, 'act': unsafe_action, 'rew': rew, 'mask': mask, 'v': v, 'logp': logp, 'terminal_v': terminal_v}) + obs = next_obs + true_obs = next_true_obs + info = info['n'][0] + self.obs = obs + self.true_obs = true_obs + self.info = info + self.total_steps += self.rollout_batch_size * self.rollout_steps + # Learn from rollout batch. + last_val = self.agent.ac.critic(torch.FloatTensor(obs).to(self.device)).detach().cpu().numpy() + ret, adv = compute_returns_and_advantages(rollouts.rew, + rollouts.v, + rollouts.mask, + rollouts.terminal_v, + last_val, + gamma=self.gamma, + use_gae=self.use_gae, + gae_lambda=self.gae_lambda) + rollouts.ret = ret + # Prevent divide-by-0 for repetitive tasks. + rollouts.adv = (adv - adv.mean()) / (adv.std() + 1e-6) + results = self.agent.update(rollouts, self.device) + results.update({'step': self.total_steps, 'elapsed_time': time.time() - start}) + return results + def log_step(self, results ): - """Does logging after a training step.""" + '''Does logging after a training step.''' step = results['step'] # runner stats self.logger.add_scalars( { 'step': step, - 'step_time': results['elapsed_time'], - 'progress': step / self.max_env_steps + 'progress': step / self.max_env_steps, }, step, - prefix='time') + prefix='time', + write=False, + write_tb=False) # Learning stats. self.logger.add_scalars( { @@ -347,9 +384,9 @@ def log_step(self, { 'ep_length': ep_lengths.mean(), 'ep_return': ep_returns.mean(), - 'ep_return_std': ep_returns.std(), 'ep_reward': (ep_returns / ep_lengths).mean(), - 'ep_constraint_violation': ep_constraint_violation.mean() + 'ep_constraint_violation': ep_constraint_violation.mean(), + 'step_time': results['elapsed_time'], }, step, prefix='stat') @@ -360,19 +397,46 @@ def log_step(self, eval_ep_lengths = results['eval']['ep_lengths'] eval_ep_returns = results['eval']['ep_returns'] eval_constraint_violation = results['eval']['constraint_violation'] - eval_rmse = results['eval']['rmse'] - eval_rmse_std = results['eval']['rmse_std'] + eval_mse = results['eval']['mse'] self.logger.add_scalars( { 'ep_length': eval_ep_lengths.mean(), 'ep_return': eval_ep_returns.mean(), - 'ep_return_std': eval_ep_returns.std(), 'ep_reward': (eval_ep_returns / eval_ep_lengths).mean(), 'constraint_violation': eval_constraint_violation.mean(), - 'rmse': eval_rmse, - 'rmse_std': eval_rmse_std + 'mse': eval_mse.mean(), + 'step_time': results['eval']['elapsed_time'], }, step, prefix='stat_eval') # Print summary table self.logger.dump_scalars() + + def env_reset(self, env, use_safe_reset): + '''Resets the environment until a feasible initial state is found. + + Args: + env (BenchmarkEnv): The environment that is being reset. + use_safe_reset (bool): Whether to safely reset the system using the SF. + + Returns: + obs (ndarray): The initial observation. + info (dict): The initial info. + ''' + success = False + action = self.model.U_EQ + obs, info = env.reset() + if self.safety_filter is not None: + self.safety_filter.reset_before_run() + + if use_safe_reset is True and self.safety_filter is not None: + while success is not True or np.any(self.safety_filter.slack_prev > 1e-4): + obs, info = env.reset() + info['current_step'] = 1 + unextended_obs = np.squeeze(obs)[:self.env.envs[0].symbolic.nx] + self.safety_filter.reset_before_run() + _, success = self.safety_filter.certify_action(unextended_obs, action, info) + if not success: + self.safety_filter.ocp_solver.reset() + + return obs, info diff --git a/safe_control_gym/envs/constraints.py b/safe_control_gym/envs/constraints.py index d7ff6fadd..3c98aaedd 100644 --- a/safe_control_gym/envs/constraints.py +++ b/safe_control_gym/envs/constraints.py @@ -106,7 +106,7 @@ def get_value(self, value (ndarray): The evaluation of the constraint. ''' env_value = self.get_env_constraint_var(env) - return np.round_(np.atleast_1d(np.squeeze(self.sym_func(np.array(env_value, ndmin=1)))), decimals=self.decimals) + return np.round(np.atleast_1d(np.squeeze(self.sym_func(np.array(env_value, ndmin=1)))), decimals=self.decimals) def is_violated(self, env, @@ -443,7 +443,7 @@ def __init__(self, self.num_constraints = self.bound.shape[0] def get_value(self, env): - c_value = np.round_(np.abs(self.constraint_filter @ env.state) - self.bound, decimals=self.decimals) + c_value = np.round(np.abs(self.constraint_filter @ env.state) - self.bound, decimals=self.decimals) return c_value # TODO: temp addition diff --git a/safe_control_gym/envs/gym_pybullet_drones/base_aviary.py b/safe_control_gym/envs/gym_pybullet_drones/base_aviary.py index 1d6af8781..802e8b4b0 100644 --- a/safe_control_gym/envs/gym_pybullet_drones/base_aviary.py +++ b/safe_control_gym/envs/gym_pybullet_drones/base_aviary.py @@ -14,11 +14,10 @@ from datetime import datetime from enum import Enum +import casadi as cs import numpy as np import pybullet as p import pybullet_data -import casadi as cs -from termcolor import colored from safe_control_gym.envs.benchmark_env import BenchmarkEnv from safe_control_gym.math_and_models.transformations import csRotXYZ, get_angularvelocity_rpy @@ -94,26 +93,26 @@ def __init__(self, self.RECORD = record # Load the drone properties from the .urdf file. self.MASS, \ - self.L, \ - self.THRUST2WEIGHT_RATIO, \ - self.J, \ - self.J_INV, \ - self.KF, \ - self.KM, \ - self.COLLISION_H, \ - self.COLLISION_R, \ - self.COLLISION_Z_OFFSET, \ - self.MAX_SPEED_KMH, \ - self.GND_EFF_COEFF, \ - self.PROP_RADIUS, \ - self.DRAG_COEFF, \ - self.DW_COEFF_1, \ - self.DW_COEFF_2, \ - self.DW_COEFF_3, \ - self.PWM2RPM_SCALE, \ - self.PWM2RPM_CONST, \ - self.MIN_PWM, \ - self.MAX_PWM = self._parse_urdf_parameters(self.URDF_PATH) + self.L, \ + self.THRUST2WEIGHT_RATIO, \ + self.J, \ + self.J_INV, \ + self.KF, \ + self.KM, \ + self.COLLISION_H, \ + self.COLLISION_R, \ + self.COLLISION_Z_OFFSET, \ + self.MAX_SPEED_KMH, \ + self.GND_EFF_COEFF, \ + self.PROP_RADIUS, \ + self.DRAG_COEFF, \ + self.DW_COEFF_1, \ + self.DW_COEFF_2, \ + self.DW_COEFF_3, \ + self.PWM2RPM_SCALE, \ + self.PWM2RPM_CONST, \ + self.MIN_PWM, \ + self.MAX_PWM = self._parse_urdf_parameters(self.URDF_PATH) self.GROUND_PLANE_Z = -0.05 if verbose: print( @@ -270,8 +269,6 @@ def _advance_simulation(self, clipped_action, disturbance_force=None): `_preprocess_action()` in each subclass. disturbance_force (ndarray, optional): Disturbance force, applied to all drones. ''' - time_before_stepping = time.time() - # clipped_action = np.reshape(clipped_action, (self.NUM_DRONES, 4)) clipped_action = np.expand_dims(clipped_action, axis=0) # Repeat for as many as the aggregate physics steps. @@ -280,20 +277,20 @@ def _advance_simulation(self, clipped_action, disturbance_force=None): # Between aggregate steps for certain types of update. if self.PYB_STEPS_PER_CTRL > 1 and self.PHYSICS in [ Physics.DYN, Physics.PYB_GND, Physics.PYB_DRAG, - Physics.PYB_DW, Physics.PYB_GND_DRAG_DW, Physics.RK4 #, Physics.DYN_2D + Physics.PYB_DW, Physics.PYB_GND_DRAG_DW, Physics.RK4 # , Physics.DYN_2D ]: self._update_and_store_kinematic_information() # Step the simulation using the desired physics update. for i in range(self.NUM_DRONES): - rpm = self._preprocess_control(clipped_action[i, :]) + executable_action = self._preprocess_control(clipped_action[i, :]) if self.PHYSICS == Physics.PYB: - self._physics(rpm, i) + self._physics(executable_action, i) elif self.PHYSICS == Physics.DYN: self._dynamics(clipped_action[i, :], i) elif self.PHYSICS == Physics.DYN_2D: - self._dynamics_2d(rpm, i) + self._dynamics_2d(executable_action, i) elif self.PHYSICS == Physics.DYN_SI: - self._dynamics_si(clipped_action[i, :], i, disturbance_force) + self._dynamics_si(executable_action, i, disturbance_force) elif self.PHYSICS == Physics.RK4: self._dynamics_rk4(clipped_action[i, :], i) elif self.PHYSICS == Physics.DYN_SI_3D: @@ -316,9 +313,9 @@ def _advance_simulation(self, clipped_action, disturbance_force=None): if disturbance_force is not None: pos = self._get_drone_state_vector(i)[:3] ''' - NOTE: applyExternalForce only works when explicitly + NOTE: applyExternalForce only works when explicitly stepping the simulation with p.stepSimulation(). - Therefore, + Therefore, ''' p.applyExternalForce( self.DRONE_IDS[i], @@ -484,7 +481,7 @@ def _ground_effect(self, rpm, nth_drone): ]) prop_heights = np.clip(prop_heights, self.GND_EFF_H_CLIP, np.inf) gnd_effects = np.array(rpm ** 2) * self.KF * self.GND_EFF_COEFF \ - * (self.PROP_RADIUS / (4 * prop_heights)) ** 2 + * (self.PROP_RADIUS / (4 * prop_heights)) ** 2 if np.abs(self.rpy[nth_drone, 0]) < np.pi / 2 and np.abs( self.rpy[nth_drone, 1]) < np.pi / 2: for i in range(4): @@ -642,7 +639,6 @@ def _dynamics_rk4(self, rpm, nth_drone): self.rpy_rates[nth_drone, :] = rpy_rates def setup_rk4_dynamics_expression(self): - nx, nu = 12, 4 gamma = self.KM / self.KF z = cs.MX.sym('z') z_dot = cs.MX.sym('z_dot') @@ -659,7 +655,7 @@ def setup_rk4_dynamics_expression(self): # PyBullet Euler angles use the SDFormat for rotation matrices. Rob = csRotXYZ(phi, theta, psi) # rotation matrix transforming a vector in the body frame to the world frame. - # Define state variables. + # Define state variables. X = cs.vertcat(x, x_dot, y, y_dot, z, z_dot, phi, theta, psi, p_body, q_body, r_body) # Define inputs. @@ -682,14 +678,14 @@ def setup_rk4_dynamics_expression(self): self.L / cs.sqrt(2.0) * (-f1 + f2 + f3 - f4), gamma * (-f1 + f2 - f3 + f4)) rate_dot = self.J_INV @ ( - Mb - (cs.skew(cs.vertcat(p_body, q_body, r_body)) @ self.J @ cs.vertcat(p_body, q_body, r_body))) + Mb - (cs.skew(cs.vertcat(p_body, q_body, r_body)) @ self.J @ cs.vertcat(p_body, q_body, r_body))) ang_dot = cs.blockcat([[1, cs.sin(phi) * cs.tan(theta), cs.cos(phi) * cs.tan(theta)], [0, cs.cos(phi), -cs.sin(phi)], [0, cs.sin(phi) / cs.cos(theta), cs.cos(phi) / cs.cos(theta)]]) @ cs.vertcat(p_body, q_body, r_body) X_dot = cs.vertcat(pos_dot[0], pos_ddot[0], pos_dot[1], pos_ddot[1], pos_dot[2], pos_ddot[2], ang_dot, rate_dot) - self.X_dot_fun = cs.Function("X_dot", [X, U], [X_dot]) + self.X_dot_fun = cs.Function('X_dot', [X, U], [X_dot]) self.fd_func = cs.integrator('fd', 'rk', {'x': X, 'p': U, 'ode': X_dot}, {'tf': self.PYB_TIMESTEP}) @@ -708,7 +704,6 @@ def _dynamics_2d(self, rpm, nth_drone): rpy = self.rpy[nth_drone, :] vel = self.vel[nth_drone, :] ang_v = self.ang_v[nth_drone, :] - rpy_rates = self.rpy_rates[nth_drone, :] # rotation = np.array(p.getMatrixFromQuaternion(quat)).reshape(3, 3) # Compute forces and torques. @@ -721,7 +716,7 @@ def _dynamics_2d(self, rpm, nth_drone): # update state with RK4 # next_state = self.fd_func(x0=state, p=input)['xf'].full()[:, 0] X_dot = self.X_dot_fun(state, action).full()[:, 0] - next_state = state + X_dot*self.PYB_TIMESTEP + next_state = state + X_dot * self.PYB_TIMESTEP # Updated information pos = np.array([next_state[0], 0, next_state[4]]) @@ -787,13 +782,13 @@ def setup_dynamics_2d_expression(self): self.L / cs.sqrt(2.0) * (-f1 + f2 + f3 - f4), gamma * (-f1 + f2 - f3 + f4)) rate_dot = self.J_INV @ ( - Mb - (cs.skew(cs.vertcat(p_body, q_body, r_body)) @ self.J @ cs.vertcat(p_body, q_body, r_body))) + Mb - (cs.skew(cs.vertcat(p_body, q_body, r_body)) @ self.J @ cs.vertcat(p_body, q_body, r_body))) ang_dot = (cs.blockcat([[1, cs.sin(phi) * cs.tan(theta), cs.cos(phi) * cs.tan(theta)], [0, cs.cos(phi), -cs.sin(phi)], [0, cs.sin(phi) / cs.cos(theta), cs.cos(phi) / cs.cos(theta)]]) @ cs.vertcat(p_body, q_body, r_body)) X_dot = cs.vertcat(pos_dot[0], pos_ddot[0], pos_dot[1], pos_ddot[1], pos_dot[2], pos_ddot[2], ang_dot, rate_dot) - self.X_dot_fun = cs.Function("X_dot", [X, U], [X_dot]) + self.X_dot_fun = cs.Function('X_dot', [X, U], [X_dot]) def _dynamics_si(self, action, nth_drone, disturbance_force=None): '''Explicit dynamics implementation from the identified model. @@ -810,7 +805,6 @@ def _dynamics_si(self, action, nth_drone, disturbance_force=None): # quat = self.quat[nth_drone, :] rpy = self.rpy[nth_drone, :] vel = self.vel[nth_drone, :] - ang_v = self.ang_v[nth_drone, :] rpy_rates = self.rpy_rates[nth_drone, :] # Compute forces and torques. @@ -820,7 +814,7 @@ def _dynamics_si(self, action, nth_drone, disturbance_force=None): # update state if disturbance_force is not None: d = np.array([disturbance_force[0], disturbance_force[2]]) - else: + else: d = np.array([0, 0]) # perform euler integration # next_state = state + self.PYB_TIMESTEP * self.X_dot_fun(state, action, d).full()[:, 0] @@ -829,7 +823,7 @@ def _dynamics_si(self, action, nth_drone, disturbance_force=None): k2 = self.X_dot_fun(state + 0.5 * self.PYB_TIMESTEP * k1, action, d).full()[:, 0] k3 = self.X_dot_fun(state + 0.5 * self.PYB_TIMESTEP * k2, action, d).full()[:, 0] k4 = self.X_dot_fun(state + self.PYB_TIMESTEP * k3, action, d).full()[:, 0] - next_state = state + (self.PYB_TIMESTEP / 6) * (k1 + 2*k2 + 2*k3 + k4) + next_state = state + (self.PYB_TIMESTEP / 6) * (k1 + 2 * k2 + 2 * k3 + k4) # Updated information pos = np.array([next_state[0], 0, next_state[2]]) @@ -853,19 +847,19 @@ def setup_dynamics_si_expression(self): theta_dot = cs.MX.sym('theta_dot') # Pitch X = cs.vertcat(x, x_dot, z, z_dot, theta, theta_dot) g = self.GRAVITY_ACC - d = cs.MX.sym('d', 2, 1) # disturbance force + d = cs.MX.sym('d', 2, 1) # disturbance force # Define inputs. - T = cs.MX.sym('T') # normlized thrust [N] + T = cs.MX.sym('T') # normlized thrust [N] P = cs.MX.sym('P') # desired pitch angle [rad] U = cs.vertcat(T, P) X_dot = cs.vertcat(x_dot, - (18.112984649321753 * T+ 3.6800) * cs.sin(theta) + -0.008 + d[0] / self.MASS, - z_dot, - (18.112984649321753 * T + 3.6800) * cs.cos(theta) - g + d[1] / self.MASS, - theta_dot, - -140.8 * theta - 13.4 * theta_dot + 124.8 * P) - self.X_dot_fun = cs.Function("X_dot", [X, U, d], [X_dot]) + (18.112984649321753 * T + 3.6800) * cs.sin(theta) + -0.008 + d[0] / self.MASS, + z_dot, + (18.112984649321753 * T + 3.6800) * cs.cos(theta) - g + d[1] / self.MASS, + theta_dot, + -140.8 * theta - 13.4 * theta_dot + 124.8 * P) + self.X_dot_fun = cs.Function('X_dot', [X, U, d], [X_dot]) def _dynamics_si_3d(self, action, nth_drone, disturbance_force=None): '''Explicit dynamics implementation from the identified model. @@ -882,7 +876,6 @@ def _dynamics_si_3d(self, action, nth_drone, disturbance_force=None): rpy = self.rpy[nth_drone, :] vel = self.vel[nth_drone, :] ang_v = self.ang_v[nth_drone, :] - rpy_rates = self.rpy_rates[nth_drone, :] # Compute forces and torques. # Update state with discrete time dynamics. @@ -961,21 +954,20 @@ def setup_dynamics_si_3d_expression(self): # - 61.62863740616216 * theta - 7.205874472066235 * theta_dot + 51.90335491067372 * P, # (- 12.544174350349687 * psi - 0.012945379372787613 * psi_dot + 43.839961280232046 * Y)*0) - X_dot = cs.vertcat(x_dot, - (18.112984649321753 * T + 3.6800) * (cs.cos(phi) * cs.sin(theta) * cs.cos(psi) + cs.sin(phi) * cs.sin(psi)) + d[0] / self.MASS, - y_dot, - 0.0, - z_dot, - (18.112984649321753 * T + 3.6800) * cs.cos(phi) * cs.cos(theta) - g + d[1] / self.MASS, - phi_dot, - theta_dot, - psi_dot, - -140.8 * phi - 13.4 * phi_dot + 124.8 * R, - -140.8 * theta - 13.4 * theta_dot + 124.8 * P, - 0.0) - - self.X_dot_fun = cs.Function("X_dot", [X, U, d], [X_dot]) + (18.112984649321753 * T + 3.6800) * (cs.cos(phi) * cs.sin(theta) * cs.cos(psi) + cs.sin(phi) * cs.sin(psi)) + d[0] / self.MASS, + y_dot, + 0.0, + z_dot, + (18.112984649321753 * T + 3.6800) * cs.cos(phi) * cs.cos(theta) - g + d[1] / self.MASS, + phi_dot, + theta_dot, + psi_dot, + -140.8 * phi - 13.4 * phi_dot + 124.8 * R, + -140.8 * theta - 13.4 * theta_dot + 124.8 * P, + 0.0) + + self.X_dot_fun = cs.Function('X_dot', [X, U, d], [X_dot]) def _show_drone_local_axes(self, nth_drone): '''Draws the local frame of the n-th drone in PyBullet's GUI. @@ -1047,5 +1039,5 @@ def _parse_urdf_parameters(self, file_name): MIN_PWM = float(URDF_TREE[0].attrib['pwm_min']) MAX_PWM = float(URDF_TREE[0].attrib['pwm_max']) return M, L, THRUST2WEIGHT_RATIO, J, J_INV, KF, KM, COLLISION_H, COLLISION_R, COLLISION_Z_OFFSET, MAX_SPEED_KMH, \ - GND_EFF_COEFF, PROP_RADIUS, DRAG_COEFF, DW_COEFF_1, DW_COEFF_2, DW_COEFF_3, \ - PWM2RPM_SCALE, PWM2RPM_CONST, MIN_PWM, MAX_PWM \ No newline at end of file + GND_EFF_COEFF, PROP_RADIUS, DRAG_COEFF, DW_COEFF_1, DW_COEFF_2, DW_COEFF_3, \ + PWM2RPM_SCALE, PWM2RPM_CONST, MIN_PWM, MAX_PWM diff --git a/safe_control_gym/envs/gym_pybullet_drones/quadrotor.py b/safe_control_gym/envs/gym_pybullet_drones/quadrotor.py index 231b053d7..f7cfcb0a9 100644 --- a/safe_control_gym/envs/gym_pybullet_drones/quadrotor.py +++ b/safe_control_gym/envs/gym_pybullet_drones/quadrotor.py @@ -644,7 +644,7 @@ def _setup_symbolic(self, prior_prop={}, **kwargs): z_dot, (beta_1 * T + beta_2) * cs.cos(theta + pitch_bias) - g, theta_dot, - alpha_1 * (theta + pitch_bias ) + alpha_2 * theta_dot + alpha_3 * P) + alpha_1 * (theta + pitch_bias) + alpha_2 * theta_dot + alpha_3 * P) # Define observation. Y = cs.vertcat(x, x_dot, z, z_dot, theta, theta_dot) elif self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE_5S: @@ -749,10 +749,10 @@ def _setup_symbolic(self, prior_prop={}, **kwargs): # TODO: create a parameter for the new quad model X_dot = cs.vertcat(x_dot, (20.763637147006943 * T + 3.1610208881218727) * ( - cs.cos(phi) * cs.sin(theta) * cs.cos(psi) + cs.sin(phi) * cs.sin(psi)), + cs.cos(phi) * cs.sin(theta) * cs.cos(psi) + cs.sin(phi) * cs.sin(psi)), y_dot, (20.763637147006943 * T + 3.1610208881218727) * ( - cs.cos(phi) * cs.sin(theta) * cs.sin(psi) - cs.sin(phi) * cs.cos(psi)), + cs.cos(phi) * cs.sin(theta) * cs.sin(psi) - cs.sin(phi) * cs.cos(psi)), z_dot, (20.763637147006943 * T + 3.1610208881218727) * cs.cos(phi) * cs.cos(theta) - g, phi_dot, @@ -869,6 +869,8 @@ def _set_action_space(self): self.hover_thrust = self.GRAVITY_ACC * self.MASS else: self.hover_thrust = self.GRAVITY_ACC * self.MASS / action_dim + self.action_scale = (self.physical_action_bounds[1] - self.physical_action_bounds[0]) / 2 + self.action_bias = (self.physical_action_bounds[1] + self.physical_action_bounds[0]) / 2 self.action_space = spaces.Box(low=-np.ones(action_dim), high=np.ones(action_dim), dtype=np.float32) @@ -998,10 +1000,10 @@ def _preprocess_control(self, action): # Identified dynamics model works with collective thrust and pitch directly # No need to compute RPMs, (save compute) self.current_clipped_action = np.clip(self.current_noisy_physical_action, - self.physical_action_bounds[0], - self.physical_action_bounds[1])[0] + self.action_space.low, + self.action_space.high) if self.PHYSICS == Physics.DYN_SI or self.PHYSICS == Physics.DYN_SI_3D: - return None + return self.current_clipped_action if self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE or self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE_5S: collective_thrust, pitch = self.current_clipped_action @@ -1036,10 +1038,11 @@ def normalize_action(self, action): normalized_action (ndarray): The action in the correct action space. """ if self.NORMALIZED_RL_ACTION_SPACE: - if self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE or self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE_5S: - action = np.array([(action[0] / self.hover_thrust - 1) / self.norm_act_scale, action[1]]) - else: - action = (action / self.hover_thrust - 1) / self.norm_act_scale + # if self.QUAD_TYPE in [QuadType.TWO_D_ATTITUDE, QuadType.TWO_D_ATTITUDE_5S, QuadType.TWO_D_ATTITUDE_BODY]: + # action = np.array([(action[0] / self.hover_thrust - 1) / self.norm_act_scale, action[1]]) + # else: + # action = (action / self.hover_thrust - 1) / self.norm_act_scale + action = (action - self.action_bias) / self.action_scale return action @@ -1054,20 +1057,22 @@ def denormalize_action(self, action): """ if self.NORMALIZED_RL_ACTION_SPACE: - if self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE or self.QUAD_TYPE == QuadType.TWO_D_ATTITUDE_5S: - # # divided by 4 as action[0] is a collective thrust - # thrust = action[0] / 4 - # hover_pwm = (self.HOVER_RPM - self.PWM2RPM_CONST) / self.PWM2RPM_SCALE - # thrust = np.where(thrust <= 0, self.MIN_PWM + (thrust + 1) * (hover_pwm - self.MIN_PWM), - # hover_pwm + (self.MAX_PWM - hover_pwm) * thrust) - - thrust = (1 + self.norm_act_scale * action[0]) * self.hover_thrust - # thrust = self.attitude_control.thrust2pwm(thrust) - # thrust = self.HOVER_RPM * (1+0.05*action[0]) - - action = np.array([thrust, action[1]]) - else: - action = (1 + self.norm_act_scale * action) * self.hover_thrust + # if self.QUAD_TYPE in [QuadType.TWO_D_ATTITUDE, QuadType.TWO_D_ATTITUDE_5S, QuadType.TWO_D_ATTITUDE_BODY]: + # # # divided by 4 as action[0] is a collective thrust + # # thrust = action[0] / 4 + # # hover_pwm = (self.HOVER_RPM - self.PWM2RPM_CONST) / self.PWM2RPM_SCALE + # # thrust = np.where(thrust <= 0, self.MIN_PWM + (thrust + 1) * (hover_pwm - self.MIN_PWM), + # # hover_pwm + (self.MAX_PWM - hover_pwm) * thrust) + # + # thrust = (1 + self.norm_act_scale * action[0]) * self.hover_thrust + # # thrust = self.attitude_control.thrust2pwm(thrust) + # # thrust = self.HOVER_RPM * (1+0.05*action[0]) + # + # action = np.array([thrust, action[1]]) + # else: + # action = (1 + self.norm_act_scale * action) * self.hover_thrust + action = action * self.action_scale + self.action_bias + # action = np.clip(action, self.action_space.low, self.action_space.high) return action @@ -1245,9 +1250,9 @@ def _get_info(self): # Filter only relevant dimensions. state_error = state_error * self.info_mse_metric_state_weight info['mse'] = np.sum(state_error ** 2) - # if self.constraints is not None: - # info['constraint_values'] = self.constraints.get_values(self) - # info['constraint_violations'] = self.constraints.get_violations(self) + if self.constraints is not None: + info['constraint_values'] = self.constraints.get_values(self) + info['constraint_violations'] = self.constraints.get_violations(self) return info def _get_reset_info(self): diff --git a/safe_control_gym/safety_filters/__init__.py b/safe_control_gym/safety_filters/__init__.py index 2d53e550f..b82a4b201 100644 --- a/safe_control_gym/safety_filters/__init__.py +++ b/safe_control_gym/safety_filters/__init__.py @@ -6,6 +6,14 @@ entry_point='safe_control_gym.safety_filters.mpsc.linear_mpsc:LINEAR_MPSC', config_entry_point='safe_control_gym.safety_filters.mpsc:mpsc.yaml') +register(idx='nl_mpsc', + entry_point='safe_control_gym.safety_filters.mpsc.nl_mpsc:NL_MPSC', + config_entry_point='safe_control_gym.safety_filters.mpsc:mpsc.yaml') + +register(idx='mpsc_acados', + entry_point='safe_control_gym.safety_filters.mpsc.mpsc_acados:MPSC_ACADOS', + config_entry_point='safe_control_gym.safety_filters.mpsc:mpsc.yaml') + register(idx='cbf', entry_point='safe_control_gym.safety_filters.cbf.cbf:CBF', config_entry_point='safe_control_gym.safety_filters.cbf:cbf.yaml') diff --git a/safe_control_gym/safety_filters/mpsc/linear_mpsc.py b/safe_control_gym/safety_filters/mpsc/linear_mpsc.py index 4f095a89e..2d9536744 100644 --- a/safe_control_gym/safety_filters/mpsc/linear_mpsc.py +++ b/safe_control_gym/safety_filters/mpsc/linear_mpsc.py @@ -18,7 +18,8 @@ from safe_control_gym.controllers.lqr.lqr_utils import discretize_linear_system from safe_control_gym.controllers.mpc.mpc_utils import rk_discrete from safe_control_gym.envs.benchmark_env import Environment, Task -from safe_control_gym.envs.constraints import ConstrainedVariableType, LinearConstraint, QuadraticContstraint +from safe_control_gym.envs.constraints import (BoundedConstraint, ConstrainedVariableType, LinearConstraint, + QuadraticContstraint) from safe_control_gym.safety_filters.mpsc.mpsc import MPSC from safe_control_gym.safety_filters.mpsc.mpsc_utils import (Cost_Function, compute_RPI_set, ellipse_bounding_box, pontryagin_difference_AABB) @@ -30,8 +31,8 @@ class LINEAR_MPSC(MPSC): def __init__(self, env_func, horizon: int = 10, - q_lin: list = None, - r_lin: list = None, + q_mpc: list = None, + r_mpc: list = None, integration_algo: str = 'rk4', n_samples: int = 600, n_samples_terminal_set: int = 100, @@ -40,7 +41,9 @@ def __init__(self, additional_constraints: list = None, use_terminal_set: bool = True, learn_terminal_set: bool = False, - cost_function: str = Cost_Function.ONE_STEP_COST, + cost_function: Cost_Function = Cost_Function.ONE_STEP_COST, + mpsc_cost_horizon: int = 5, + decay_factor: float = 0.85, **kwargs ): '''Initialize the MPSC. @@ -48,7 +51,7 @@ def __init__(self, Args: env_func (partial gym.Env): Environment for the task. horizon (int): The MPC horizon. - q_lin, r_lin (list): Q and R gain matrices for linear controller. + q_mpc, r_mpc (list): Q and R gain matrices for linear controller. integration_algo (str): The algorithm used for integrating the dynamics, either 'LTI', 'rk4', 'rk', or 'cvodes'. n_samples (int): Number of samples to create W set. @@ -58,7 +61,9 @@ def __init__(self, additional_constraints (list): List of additional constraints to consider. use_terminal_set (bool): Whether to use a terminal set constraint or not. learn_terminal_set (bool): Whether to learn a terminal set or not. - cost_function (str): A string (from Cost_Function) representing the cost function to be used. + cost_function (Cost_Function): A string (from Cost_Function) representing the cost function to be used. + mpsc_cost_horizon (int): How many steps forward to check for constraint violations. + decay_factor (float): How much to discount future costs. ''' # Store all params/args. @@ -66,7 +71,7 @@ def __init__(self, if key != 'self' and key != 'kwargs' and '__' not in key: self.__dict__[key] = value - super().__init__(env_func, horizon, q_lin, r_lin, integration_algo, warmstart, additional_constraints, use_terminal_set, cost_function, **kwargs) + super().__init__(env_func, horizon, q_mpc, r_mpc, integration_algo, warmstart, additional_constraints, use_terminal_set, cost_function, mpsc_cost_horizon, decay_factor, **kwargs) self.terminal_set_verts = None @@ -79,7 +84,7 @@ def set_dynamics(self): dfdu = dfdxdfdu['dfdu'].toarray() delta_x = cs.MX.sym('delta_x', self.model.nx, 1) delta_u = cs.MX.sym('delta_u', self.model.nu, 1) - self.discrete_dfdx, self.discrete_dfdu = discretize_linear_system(dfdx, dfdu, self.dt) + self.discrete_dfdx, self.discrete_dfdu = discretize_linear_system(dfdx, dfdu, self.dt, exact=True) if self.integration_algo == 'LTI': x_dot_lin_vec = self.discrete_dfdx @ delta_x + self.discrete_dfdu @ delta_u @@ -176,6 +181,8 @@ def learn(self, self.setup_optimizer() self.terminal_set_verts = points + self.terminal_constraint = LinearConstraint(env=self.env, A=self.terminal_set.A, b=self.terminal_set.b, constrained_variable=ConstrainedVariableType.STATE) + self.terminal_constraint = self.terminal_constraint.get_symbolic_model() def load(self, path, @@ -247,8 +254,13 @@ def tighten_state_and_input_constraints(self): if self.training_env.NAME == Environment.QUADROTOR: min_input = input_constraint.lower_bounds[0] + np.max(self.U_vertices) - np.max(self.tightened_input_constraint_verts) self.tightened_input_constraint_verts = np.clip(self.tightened_input_constraint_verts, min_input, 100) - self.tightened_input_constraint = tightened_input_constr_func(env=self.env, - constrained_variable=ConstrainedVariableType.INPUT) + self.tightened_input_constraint = BoundedConstraint(env=self.env, + lower_bounds=np.min(self.tightened_input_constraint_verts, axis=0), + upper_bounds=np.max(self.tightened_input_constraint_verts, axis=0), + constrained_variable=ConstrainedVariableType.INPUT) + else: + self.tightened_input_constraint = tightened_input_constr_func(env=self.env, + constrained_variable=ConstrainedVariableType.INPUT) # Get the state constraint vertices. state_constraints = self.constraints.state_constraints if len(state_constraints) > 1: @@ -261,12 +273,11 @@ def tighten_state_and_input_constraints(self): self.tightened_state_constraint = tightened_state_constraint_func(env=self.env, constrained_variable=ConstrainedVariableType.STATE) - self.simple_terminal_set = QuadraticContstraint(env=self.env, - P=np.eye(self.model.nx), - b=self.env.TASK_INFO['stabilization_goal_tolerance'], - constrained_variable=ConstrainedVariableType.STATE) + def setup_acados_optimizer(self): + '''Setup the certifying MPC problem.''' + raise NotImplementedError - def setup_optimizer(self): + def setup_casadi_optimizer(self): '''Setup the certifying MPC problem.''' # Horizon parameter. @@ -292,11 +303,13 @@ def setup_optimizer(self): elif self.env.TASK == Task.TRAJ_TRACKING: X_GOAL = opti.parameter(self.horizon, nx) + slack = opti.variable(1, 1) # TODO: Add slack + opti.subject_to(slack == 0) + # Constraints (currently only handles a single constraint for state and input). state_constraints = self.tightened_state_constraint.get_symbolic_model() input_constraints = self.tightened_input_constraint.get_symbolic_model() omega_constraint = self.omega_constraint.get_symbolic_model() - simple_terminal_constraint = self.simple_terminal_set.get_symbolic_model() for i in range(self.horizon): # Dynamics constraints (eqn 5.b). next_state = self.dynamics_func(x0=z_var[:, i], p=v_var[:, i])['xf'] @@ -307,13 +320,8 @@ def setup_optimizer(self): opti.subject_to(state_constraints(z_var[:, i] + X_EQ) <= 0) # Final state constraints (5.d). - if self.use_terminal_set: - if self.terminal_set is not None: - terminal_constraint = LinearConstraint(env=self.env, A=self.terminal_set.A, b=self.terminal_set.b, constrained_variable=ConstrainedVariableType.STATE) - terminal_constraint = terminal_constraint.get_symbolic_model() - opti.subject_to(terminal_constraint(z_var[:, -1]) <= 0) - else: - opti.subject_to(simple_terminal_constraint(z_var[:, -1]) <= 0) + if self.use_terminal_set and self.terminal_set is not None: + opti.subject_to(self.terminal_constraint(z_var[:, -1]) <= 0) # Initial state constraints (5.e). opti.subject_to(omega_constraint(x_init - z_var[:, 0]) <= 0) @@ -338,11 +346,13 @@ def setup_optimizer(self): 'x_init': x_init, 'X_EQ': X_EQ, 'X_GOAL': X_GOAL, + 'slack': slack, } # Cost (# eqn 5.a, note: using 2norm or sqrt makes this infeasible). cost = self.cost_function.get_cost(self.opti_dict) opti.minimize(cost) + self.opti_dict['cost'] = cost def before_optimization(self, obs): '''Setup the optimization. diff --git a/safe_control_gym/safety_filters/mpsc/mpsc.py b/safe_control_gym/safety_filters/mpsc/mpsc.py index 34307fe8e..251c58985 100644 --- a/safe_control_gym/safety_filters/mpsc/mpsc.py +++ b/safe_control_gym/safety_filters/mpsc/mpsc.py @@ -17,6 +17,7 @@ from safe_control_gym.controllers.mpc.mpc_utils import get_cost_weight_matrix, reset_constraints from safe_control_gym.safety_filters.base_safety_filter import BaseSafetyFilter from safe_control_gym.safety_filters.mpsc.mpsc_cost_function.one_step_cost import ONE_STEP_COST +from safe_control_gym.safety_filters.mpsc.mpsc_cost_function.precomputed_cost import PRECOMPUTED_COST from safe_control_gym.safety_filters.mpsc.mpsc_utils import Cost_Function, get_trajectory_on_horizon @@ -26,13 +27,16 @@ class MPSC(BaseSafetyFilter, ABC): def __init__(self, env_func, horizon: int = 10, - q_lin: list = None, - r_lin: list = None, + q_mpc: list = None, + r_mpc: list = None, integration_algo: str = 'rk4', warmstart: bool = True, additional_constraints: list = None, use_terminal_set: bool = True, cost_function: Cost_Function = Cost_Function.ONE_STEP_COST, + mpsc_cost_horizon: int = 5, + decay_factor: float = 0.85, + use_acados: bool = False, **kwargs ): '''Initialize the MPSC. @@ -40,13 +44,15 @@ def __init__(self, Args: env_func (partial BenchmarkEnv): Environment for the task. horizon (int): The MPC horizon. - q_lin, r_lin (list): Q and R gain matrices for linear controller. + q_mpc, r_mpc (list): Q and R gain matrices for linear controller. integration_algo (str): The algorithm used for integrating the dynamics, either 'LTI', 'rk4', 'rk', or 'cvodes'. warmstart (bool): If the previous MPC soln should be used to warmstart the next mpc step. additional_constraints (list): List of additional constraints to consider. use_terminal_set (bool): Whether to use a terminal set constraint or not. cost_function (Cost_Function): A string (from Cost_Function) representing the cost function to be used. + mpsc_cost_horizon (int): How many steps forward to check for constraint violations. + decay_factor (float): How much to discount future costs. ''' # Store all params/args. @@ -62,15 +68,14 @@ def __init__(self, self.env = env_func(normalized_rl_action_space=False) self.training_env = env_func(randomized_init=True, init_state=None, - cost='quadratic', normalized_rl_action_space=False, ) # Setup attributes. self.reset() self.dt = self.model.dt - self.Q = get_cost_weight_matrix(q_lin, self.model.nx) - self.R = get_cost_weight_matrix(r_lin, self.model.nu) + self.Q = get_cost_weight_matrix(q_mpc, self.model.nx) + self.R = get_cost_weight_matrix(r_mpc, self.model.nu) self.X_EQ = np.zeros(self.model.nx) self.U_EQ = self.model.U_EQ @@ -79,6 +84,7 @@ def __init__(self, self.lqr_gain = -compute_lqr_gain(self.model, self.X_EQ, self.U_EQ, self.Q, self.R, discrete_dynamics=True) self.terminal_set = None + self.prev_action = self.U_EQ if self.additional_constraints is None: additional_constraints = [] @@ -87,6 +93,10 @@ def __init__(self, if cost_function == Cost_Function.ONE_STEP_COST: self.cost_function = ONE_STEP_COST() + self.mpsc_cost_horizon = 1 + self.cost_function.mpsc_cost_horizon = 1 + elif cost_function == Cost_Function.PRECOMPUTED_COST: + self.cost_function = PRECOMPUTED_COST(self.env, mpsc_cost_horizon, decay_factor, self.output_dir) else: raise NotImplementedError(f'The MPSC cost function {cost_function} has not been implemented') @@ -95,9 +105,21 @@ def set_dynamics(self): '''Compute the dynamics.''' raise NotImplementedError - @abstractmethod def setup_optimizer(self): '''Setup the certifying MPC problem.''' + if self.use_acados: + self.setup_acados_optimizer() + else: + self.setup_casadi_optimizer() + + @abstractmethod + def setup_casadi_optimizer(self): + '''Setup the certifying MPC problem using CasADi.''' + raise NotImplementedError + + @abstractmethod + def setup_acados_optimizer(self): + '''Setup the certifying MPC problem using ACADOS.''' raise NotImplementedError def before_optimization(self, obs): @@ -125,6 +147,28 @@ def solve_optimization(self, feasible (bool): Whether the safety filtering was feasible or not. ''' + if self.use_acados: + action, feasible = self.solve_acados_optimization(obs, uncertified_action, iteration) + else: + action, feasible = self.solve_casadi_optimization(obs, uncertified_action, iteration) + return action, feasible + + def solve_casadi_optimization(self, + obs, + uncertified_action, + iteration=None, + ): + '''Solve the MPC optimization problem for a given observation and uncertified input. + + Args: + obs (ndarray): Current state/observation. + uncertified_action (ndarray): The uncertified_controller's action. + iteration (int): The current iteration, used for trajectory tracking. + + Returns: + action (ndarray): The certified action. + feasible (bool): Whether the safety filtering was feasible or not. + ''' opti_dict = self.opti_dict opti = opti_dict['opti'] z_var = opti_dict['z_var'] @@ -153,6 +197,8 @@ def solve_optimization(self, # Solve the optimization problem. try: sol = opti.solve() + self.cost_prev = sol.value(opti_dict['cost']) + self.slack_prev = sol.value(opti_dict['slack']) x_val, u_val, next_u_val = sol.value(z_var), sol.value(v_var), sol.value(next_u) self.z_prev = x_val self.v_prev = u_val.reshape((self.model.nu), self.horizon) @@ -168,6 +214,56 @@ def solve_optimization(self, action = None return action, feasible + def solve_acados_optimization(self, + obs, + uncertified_action, + iteration=None, + ): + '''Solve the MPC optimization problem for a given observation and uncertified input. + + Args: + obs (ndarray): Current state/observation. + uncertified_action (ndarray): The uncertified_controller's action. + iteration (int): The current iteration, used for trajectory tracking. + + Returns: + action (ndarray): The certified action. + feasible (bool): Whether the safety filtering was feasible or not. + ''' + + ocp_solver = self.ocp_solver + ocp_solver.cost_set(0, 'yref', np.concatenate((np.zeros((self.model.nx)), np.atleast_1d(np.squeeze(uncertified_action))))) + + if isinstance(self.cost_function, PRECOMPUTED_COST): + uncert_input_traj = self.cost_function.calculate_unsafe_path(obs, uncertified_action, iteration) + + for stage in range(1, self.mpsc_cost_horizon): + ocp_solver.cost_set(stage, 'yref', np.concatenate((np.zeros((self.model.nx)), uncert_input_traj[:, stage]))) + + # Solve the optimization problem. + try: + action = ocp_solver.solve_for_x0(x0_bar=obs) + self.cost_prev = ocp_solver.get_cost() + self.slack_prev = np.zeros((self.horizon, self.model.nx + self.model.nu)) + x_val = np.zeros((self.horizon + 1, self.model.nx)) + u_val = np.zeros((self.horizon, self.model.nu)) + for i in range(self.horizon): + # self.slack_prev[i, :] = ocp_solver.get(i, 'su') + x_val[i, :] = ocp_solver.get(i, 'x') + u_val[i, :] = ocp_solver.get(i, 'u') + x_val[self.horizon, :] = ocp_solver.get(self.horizon, 'x') + self.z_prev = x_val.T + self.v_prev = u_val.T + # Take the first one from solved action sequence. + self.prev_action = action + feasible = True + except Exception as e: + print('Error Return Status:', ocp_solver.status) + print(e) + feasible = False + action = None + return action, feasible + def certify_action(self, current_state, uncertified_action, @@ -254,5 +350,6 @@ def reset_before_run(self, env=None): ''' self.z_prev = None self.v_prev = None + self.slack_prev = 0 self.kinf = self.horizon - 1 self.setup_results_dict() diff --git a/safe_control_gym/safety_filters/mpsc/mpsc.yaml b/safe_control_gym/safety_filters/mpsc/mpsc.yaml index 9d2798d0c..9640712bd 100644 --- a/safe_control_gym/safety_filters/mpsc/mpsc.yaml +++ b/safe_control_gym/safety_filters/mpsc/mpsc.yaml @@ -1,7 +1,7 @@ # LQR controller parameters -r_lin: +r_mpc: - 1. -q_lin: +q_mpc: - 1. # MPC Parameters @@ -10,19 +10,7 @@ warmstart: False integration_algo: rk4 use_terminal_set: False -# Prior info -prior_info: - prior_prop: null - randomize_prior_prop: False - prior_prop_rand_info: null - -# Safe set calculation -n_samples: 600 -n_samples_terminal_set: 10 -learn_terminal_set: False - -# Tau parameter for the calcuation of the RPI -tau: 0.95 - # Cost function cost_function: one_step_cost +mpsc_cost_horizon: 5 +decay_factor: 0.85 diff --git a/safe_control_gym/safety_filters/mpsc/mpsc_acados.py b/safe_control_gym/safety_filters/mpsc/mpsc_acados.py new file mode 100644 index 000000000..44692a32e --- /dev/null +++ b/safe_control_gym/safety_filters/mpsc/mpsc_acados.py @@ -0,0 +1,276 @@ +'''Model Predictive Safety Certification using Acados.''' +import os +import shutil +from datetime import datetime + +import numpy as np +import scipy +from acados_template import AcadosModel, AcadosOcp, AcadosOcpSolver + +from safe_control_gym.controllers.mpc.mpc_utils import set_acados_constraint_bound +from safe_control_gym.safety_filters.mpsc.mpsc import MPSC +from safe_control_gym.safety_filters.mpsc.mpsc_utils import Cost_Function, box2polytopic +from safe_control_gym.utils.utils import timing + + +class MPSC_ACADOS(MPSC): + '''MPSC with full nonlinear model.''' + + def __init__( + self, + env_func, + horizon: int = 5, + q_mpc: list = [1], + r_mpc: list = [1], + integration_algo: str = 'rk4', + warmstart: bool = True, + additional_constraints: list = None, + use_terminal_set: bool = True, + soften_constraints: bool = False, + slack_cost: float = 1, + constraint_tol: float = 1e-6, + seed: int = 0, + use_RTI: bool = False, + cost_function: Cost_Function = Cost_Function.ONE_STEP_COST, + mpsc_cost_horizon: int = 5, + decay_factor: float = 0.85, + **kwargs + ): + '''Creates task and controller. + + Args: + env_func (Callable): function to instantiate task/environment. + horizon (int): mpc planning horizon. + q_mpc (list): diagonals of state cost weight. + r_mpc (list): diagonals of input/action cost weight. + warmstart (bool): if to initialize from previous iteration. + soften_constraints (bool): Formulate the constraints as soft constraints. + constraint_tol (float): Tolerance to add the the constraint as sometimes solvers are not exact. + seed (int): random seed. + use_RTI (bool): Real-time iteration for acados. + ''' + for k, v in locals().items(): + if k != 'self' and k != 'kwargs' and '__' not in k: + self.__dict__.update({k: v}) + + super().__init__( + env_func, + horizon, + q_mpc, + r_mpc, + integration_algo, + warmstart, + additional_constraints, + use_terminal_set, + cost_function, + mpsc_cost_horizon, + decay_factor, + **kwargs) + + # acados settings + self.use_RTI = use_RTI + + self.n = self.model.nx + self.m = self.model.nu + self.q = self.model.nx + + self.state_constraint = self.constraints.state_constraints[0] + self.input_constraint = self.constraints.input_constraints[0] + + [self.X_mid, L_x, l_x] = box2polytopic(self.state_constraint) + [self.U_mid, L_u, l_u] = box2polytopic(self.input_constraint) + + # number of constraints + p_x = l_x.shape[0] + p_u = l_u.shape[0] + self.p = p_x + p_u + + self.L_x = np.vstack((L_x, np.zeros((p_u, self.n)))) + self.L_u = np.vstack((np.zeros((p_x, self.m)), L_u)) + self.l_xu = np.concatenate([l_x, l_u]) + + # Dynamics model. + self.setup_acados_model() + # Acados optimizer. + self.setup_acados_optimizer() + + @timing + def reset(self): + '''Prepares for training or evaluation.''' + print('Resetting MPC') + super().reset() + if hasattr(self, 'acados_model'): + del self.acados_model + if hasattr(self, 'ocp'): + del self.ocp + if hasattr(self, 'acados_ocp_solver'): + del self.acados_ocp_solver + + # delete the generated c code directory + if os.path.exists('./c_generated_code'): + print('deleting the generated MPC c code directory') + shutil.rmtree('./c_generated_code') + assert not os.path.exists('./c_generated_code'), 'Failed to delete the generated c code directory' + + def setup_acados_model(self) -> AcadosModel: + '''Sets up symbolic model for acados.''' + acados_model = AcadosModel() + acados_model.x = self.model.x_sym + acados_model.u = self.model.u_sym + acados_model.name = self.env.NAME + + # set up rk4 (acados need symbolic expression of dynamics, not function) + k1 = self.model.fc_func(acados_model.x, acados_model.u) + k2 = self.model.fc_func(acados_model.x + self.dt / 2 * k1, acados_model.u) + k3 = self.model.fc_func(acados_model.x + self.dt / 2 * k2, acados_model.u) + k4 = self.model.fc_func(acados_model.x + self.dt * k3, acados_model.u) + f_disc = acados_model.x + self.dt / 6 * (k1 + 2 * k2 + 2 * k3 + k4) + + acados_model.disc_dyn_expr = f_disc + + # store meta information # NOTE: unit is missing + acados_model.x_labels = self.env.STATE_LABELS + acados_model.u_labels = self.env.ACTION_LABELS + acados_model.t_label = 'time' + # get current time stamp in $ymd_HMS format + current_time = datetime.now().strftime('%Y%m%d_%H%M%S') + acados_model.name = self.env.NAME + '_' + current_time + + self.acados_model = acados_model + + def set_dynamics(self): + pass + + def setup_casadi_optimizer(self): + pass + + def setup_acados_optimizer(self): + '''Sets up nonlinear optimization problem.''' + nx, nu = self.model.nx, self.model.nu + ny = nx + nu + ny_e = nx + + # create ocp object to formulate the OCP + ocp = AcadosOcp() + ocp.model = self.acados_model + + # set dimensions + ocp.dims.N = self.horizon # prediction horizon + + # set cost (NOTE: safe-control-gym uses quadratic cost) + ocp.cost.cost_type = 'LINEAR_LS' + ocp.cost.cost_type_e = 'LINEAR_LS' + + Q_mat = np.zeros((nx, nx)) + R_mat = np.eye(nu) + ocp.cost.W_e = np.zeros((nx, nx)) + ocp.cost.W = scipy.linalg.block_diag(Q_mat, R_mat) + + ocp.cost.Vx = np.zeros((ny, nx)) + ocp.cost.Vu = np.zeros((ny, nu)) + ocp.cost.Vu[nx:nx + nu, :] = np.eye(nu) + ocp.cost.Vx_e = np.eye(nx) + + # placeholder y_ref and y_ref_e (will be set in select_action) + ocp.cost.yref = np.zeros((ny, )) + ocp.cost.yref_e = np.zeros((ny_e, )) + + # set up solver options + ocp.solver_options.qp_solver = 'PARTIAL_CONDENSING_HPIPM' + ocp.solver_options.hessian_approx = 'GAUSS_NEWTON' + ocp.solver_options.integrator_type = 'DISCRETE' + ocp.solver_options.nlp_solver_type = 'SQP' if not self.use_RTI else 'SQP_RTI' + ocp.solver_options.nlp_solver_max_iter = 25 if not self.use_RTI else 1 + ocp.solver_options.tf = self.horizon * self.dt # prediction horizon + + # set constraints + ocp.constraints.constr_type = 'BGH' + ocp.constraints.constr_type_e = 'BGH' + + ocp.constraints.x0 = self.model.X_EQ + ocp.constraints.C = self.L_x + ocp.constraints.D = self.L_u + ocp.constraints.lg = -1000 * np.ones((self.p)) + ocp.constraints.ug = np.zeros((self.p)) + + # slack costs for nonlinear constraints + if self.soften_constraints: + # slack variables for all constraints + ocp.constraints.Jsg = np.eye(self.p) + # slack penalty + ocp.cost.Zu = self.slack_cost * np.ones(self.p) + ocp.cost.Zl = self.slack_cost * np.ones(self.p) + ocp.cost.zl = self.slack_cost * np.ones(self.p) + ocp.cost.zu = self.slack_cost * np.ones(self.p) + + solver_json = 'acados_ocp_mpsf.json' + ocp_solver = AcadosOcpSolver(ocp, json_file=solver_json, generate=True, build=True) + + for stage in range(self.mpsc_cost_horizon): + ocp_solver.cost_set(stage, 'W', (self.cost_function.decay_factor**stage) * ocp.cost.W) + + for stage in range(self.mpsc_cost_horizon, self.horizon): + ocp_solver.cost_set(stage, 'W', 0 * ocp.cost.W) + + g = np.zeros((self.horizon, self.p)) + for i in range(self.horizon): + for j in range(self.p): + g[i, j] = self.l_xu[j] + g[i, :] += (self.L_x @ self.X_mid) + (self.L_u @ self.U_mid) + ocp_solver.constraints_set(i, 'ug', g[i, :]) + + self.ocp_solver = ocp_solver + self.ocp = ocp + + def processing_acados_constraints_expression(self, ocp: AcadosOcp, h0_expr, h_expr, he_expr) -> AcadosOcp: + '''Preprocess the constraints to be compatible with acados. + Args: + ocp (AcadosOcp): acados ocp object + h0_expr (casadi expression): initial state constraints + h_expr (casadi expression): state and input constraints + he_expr (casadi expression): terminal state constraints + Returns: + ocp (AcadosOcp): acados ocp object with constraints set. + + An alternative way to set the constraints is to use bounded constraints of acados: + # bounded input constraints + idxbu = np.where(np.sum(self.env.constraints.input_constraints[0].constraint_filter, axis=0) != 0)[0] + ocp.constraints.Jbu = np.eye(nu) + ocp.constraints.lbu = self.env.constraints.input_constraints[0].lower_bounds + ocp.constraints.ubu = self.env.constraints.input_constraints[0].upper_bounds + ocp.constraints.idxbu = idxbu # active constraints dimension + ''' + + ub = {'h': set_acados_constraint_bound(h_expr, 'ub', self.constraint_tol), + 'h0': set_acados_constraint_bound(h0_expr, 'ub', self.constraint_tol), + 'he': set_acados_constraint_bound(he_expr, 'ub', self.constraint_tol), } + + lb = {'h': set_acados_constraint_bound(h_expr, 'lb'), + 'h0': set_acados_constraint_bound(h0_expr, 'lb'), + 'he': set_acados_constraint_bound(he_expr, 'lb'), } + + # make sure all the ub and lb are 1D numpy arrays + # (see: https://discourse.acados.org/t/infeasible-qps-when-using-nonlinear-casadi-constraint-expressions/1595/5?u=mxche) + for key in ub.keys(): + ub[key] = ub[key].flatten() if ub[key].ndim != 1 else ub[key] + lb[key] = lb[key].flatten() if lb[key].ndim != 1 else lb[key] + # check ub and lb dimensions + for key in ub.keys(): + assert ub[key].ndim == 1, f'ub[{key}] is not 1D numpy array' + assert lb[key].ndim == 1, f'lb[{key}] is not 1D numpy array' + assert ub['h'].shape == lb['h'].shape, 'h_ub and h_lb have different shapes' + + # pass the constraints to the ocp object + ocp.model.con_h_expr_0, ocp.model.con_h_expr, ocp.model.con_h_expr_e = \ + h0_expr, h_expr, he_expr + ocp.dims.nh_0, ocp.dims.nh, ocp.dims.nh_e = \ + h0_expr.shape[0], h_expr.shape[0], he_expr.shape[0] + # assign constraints upper and lower bounds + ocp.constraints.uh_0 = ub['h0'] + ocp.constraints.lh_0 = lb['h0'] + ocp.constraints.uh = ub['h'] + ocp.constraints.lh = lb['h'] + ocp.constraints.uh_e = ub['he'] + ocp.constraints.lh_e = lb['he'] + + return ocp diff --git a/safe_control_gym/safety_filters/mpsc/mpsc_cost_function/abstract_cost.py b/safe_control_gym/safety_filters/mpsc/mpsc_cost_function/abstract_cost.py index 3980790c2..cc9db996a 100644 --- a/safe_control_gym/safety_filters/mpsc/mpsc_cost_function/abstract_cost.py +++ b/safe_control_gym/safety_filters/mpsc/mpsc_cost_function/abstract_cost.py @@ -10,11 +10,15 @@ class MPSC_COST(ABC): def __init__(self, env: BenchmarkEnv = None, + mpsc_cost_horizon: int = 1, + decay_factor: float = 0.85, ): '''Initialize the MPSC Cost. Args: env (BenchmarkEnv): Environment for the task. + mpsc_cost_horizon (int): How many steps forward to check for constraint violations. + decay_factor (float): How much to discount future costs. ''' self.env = env @@ -22,6 +26,9 @@ def __init__(self, # Setup attributes. self.model = self.env.symbolic if env is not None else None + self.mpsc_cost_horizon = mpsc_cost_horizon + self.decay_factor = decay_factor + @abstractmethod def get_cost(self, opti_dict): '''Returns the cost function for the MPSC optimization in symbolic form. diff --git a/safe_control_gym/safety_filters/mpsc/mpsc_cost_function/precomputed_cost.py b/safe_control_gym/safety_filters/mpsc/mpsc_cost_function/precomputed_cost.py new file mode 100644 index 000000000..b6a1acc4e --- /dev/null +++ b/safe_control_gym/safety_filters/mpsc/mpsc_cost_function/precomputed_cost.py @@ -0,0 +1,127 @@ +'''Precomputed Cost Function for Smooth MPSC.''' + +import numpy as np + +from safe_control_gym.controllers.pid.pid import PID +from safe_control_gym.envs.env_wrappers.vectorized_env.vec_env import VecEnv +from safe_control_gym.safety_filters.mpsc.mpsc_cost_function.abstract_cost import MPSC_COST + + +class PRECOMPUTED_COST(MPSC_COST): + '''Precomputed future states MPSC Cost Function.''' + + def __init__(self, + env, + mpsc_cost_horizon: int = 5, + decay_factor: float = 0.85, + output_dir: str = '.', + ): + '''Initialize the MPSC Cost. + + Args: + env (BenchmarkEnv): Environment for the task. + mpsc_cost_horizon (int): How many steps forward to check for constraint violations. + decay_factor (float): How much to discount future costs. + output_dir (str): Folder to write outputs. + ''' + + super().__init__(env, mpsc_cost_horizon, decay_factor) + + self.output_dir = output_dir + self.uncertified_controller = None + + def get_cost(self, opti_dict): + '''Returns the cost function for the MPSC optimization in symbolic form. + + Args: + opti_dict (dict): The dictionary of optimization variables. + + Returns: + cost (casadi symbolic expression): The symbolic cost function using casadi. + ''' + + opti = opti_dict['opti'] + next_u = opti_dict['next_u'] + u_L = opti_dict['u_L'] + v_var = opti_dict['v_var'] + + v_L = opti.parameter(self.model.nu, self.mpsc_cost_horizon) + + opti_dict['v_L'] = v_L + + cost = (u_L - next_u).T @ (u_L - next_u) + for h in range(1, self.mpsc_cost_horizon): + cost += (self.decay_factor**h) * (v_L[:, h] - v_var[:, h]).T @ (v_L[:, h] - v_var[:, h]) + + return cost + + def prepare_cost_variables(self, opti_dict, obs, iteration): + '''Prepares all the symbolic variable initial values for the next optimization. + + Args: + opti_dict (dict): The dictionary of optimization variables. + obs (ndarray): Current state/observation. + iteration (int): The current iteration, used for trajectory tracking. + ''' + + opti = opti_dict['opti'] + v_L = opti_dict['v_L'] + u_L = opti_dict['u_L'] + + uncertified_action = opti.value(u_L, opti.initial()) + + expected_inputs = self.calculate_unsafe_path(obs, uncertified_action, iteration) + opti.set_value(v_L, expected_inputs) + + def calculate_unsafe_path(self, obs, uncertified_action, iteration): + '''Precomputes the likely actions the uncertified controller will take. + + Args: + obs (ndarray): Current state/observation. + uncertified_action (ndarray): The uncertified_controller's action. + iteration (int): The current iteration, used for trajectory tracking. + + Returns: + v_L (ndarray): The estimated future actions taken by the uncertified_controller. + ''' + + if self.uncertified_controller is None: + raise Exception('[ERROR] No underlying controller passed to P_MPSC') + + if isinstance(self.uncertified_controller.env, VecEnv): + uncert_env = self.uncertified_controller.env.envs[0] + else: + uncert_env = self.uncertified_controller.env + + v_L = np.zeros((self.model.nu, self.mpsc_cost_horizon)) + + if isinstance(self.uncertified_controller, PID): + self.uncertified_controller.save(f'{self.output_dir}/temp-data/saved_controller_curr.npy') + self.uncertified_controller.load(f'{self.output_dir}/temp-data/saved_controller_prev.npy') + + for h in range(self.mpsc_cost_horizon): + next_step = min(iteration + h, self.env.X_GOAL.shape[0] - 1) + # Concatenate goal info (goal state(s)) for RL + extended_obs = self.env.extend_obs(obs, next_step + 1) + + info = {'current_step': next_step} + + action = self.uncertified_controller.select_action(obs=extended_obs, info=info) + + if uncert_env.NORMALIZED_RL_ACTION_SPACE: + action = uncert_env.denormalize_action(action) + + action = np.clip(action, self.env.physical_action_bounds[0], self.env.physical_action_bounds[1]) + + if h == 0 and np.linalg.norm(uncertified_action - action) >= 0.001: + raise ValueError(f'[ERROR] Mismatch between unsafe controller and MPSC guess. Uncert: {uncertified_action}, Guess: {action}, Diff: {np.linalg.norm(uncertified_action - action)}.') + + v_L[:, h:h + 1] = action.reshape((self.model.nu, 1)) + + obs = np.squeeze(self.model.fd_func(x0=obs, p=action)['xf'].toarray()) + + if isinstance(self.uncertified_controller, PID): + self.uncertified_controller.load(f'{self.output_dir}/temp-data/saved_controller_curr.npy') + self.uncertified_controller.save(f'{self.output_dir}/temp-data/saved_controller_prev.npy') + + return v_L diff --git a/safe_control_gym/safety_filters/mpsc/mpsc_utils.py b/safe_control_gym/safety_filters/mpsc/mpsc_utils.py index 98bfd40be..0a66435a4 100644 --- a/safe_control_gym/safety_filters/mpsc/mpsc_utils.py +++ b/safe_control_gym/safety_filters/mpsc/mpsc_utils.py @@ -16,6 +16,7 @@ class Cost_Function(str, Enum): '''MPSC Cost functions enumeration class.''' ONE_STEP_COST = 'one_step_cost' # Default MPSC cost function. + PRECOMPUTED_COST = 'precomputed_cost' # Smooth cost based on precomputed future states def compute_RPI_set(Acl, @@ -58,8 +59,7 @@ def compute_RPI_set(Acl, except cp.SolverError: msg = '[ERROR] RPI Computation failed. Ensure you have the MOSEK solver. Otherwise, error unknown.' print(msg) - raise Exception(msg) from None - # exit() + raise Exception(msg) from None return P.value @@ -144,3 +144,58 @@ def get_trajectory_on_horizon(env, iteration, horizon): clipped_X_GOAL = env.X_GOAL return clipped_X_GOAL + + +def get_discrete_derivative(signal, ctrl_freq): + '''Calculates the discrete derivative of a signal. + + Args: + signal (np.ndarray): A array of values. + + Returns: + discrete_derivative (float): The discrete_derivative of the signal. + ''' + discrete_derivative = (signal[1:, :] - signal[:-1, :]) * ctrl_freq + return discrete_derivative + + +def box2polytopic(constraint): + '''Convert constraints into an explicit polytopic form. This assumes that constraints contain the origin. + + Args: + constraint (Constraint): The constraint to be converted. + + Returns: + L (ndarray): The polytopic matrix. + l (ndarray): Whether the constraint is active. + ''' + + Limit = [] + limit_active = [] + + Z_mid = (constraint.upper_bounds + constraint.lower_bounds) / 2.0 + Z_limits = np.array([[constraint.upper_bounds[i] - Z_mid[i], constraint.lower_bounds[i] - Z_mid[i]] for i in range(constraint.upper_bounds.shape[0])]) + + dim = Z_limits.shape[0] + eye_dim = np.eye(dim) + + for constraint_id in range(0, dim): + if Z_limits[constraint_id, 0] != -float('inf'): + if Z_limits[constraint_id, 0] == 0: + limit_active += [0] + Limit += [-eye_dim[constraint_id, :]] + else: + limit_active += [1] + factor = 1 / Z_limits[constraint_id, 0] + Limit += [factor * eye_dim[constraint_id, :]] + + if Z_limits[constraint_id, 1] != float('inf'): + if Z_limits[constraint_id, 1] == 0: + limit_active += [0] + Limit += [eye_dim[constraint_id, :]] + else: + limit_active += [1] + factor = 1 / Z_limits[constraint_id, 1] + Limit += [factor * eye_dim[constraint_id, :]] + + return Z_mid, np.array(Limit), np.array(limit_active) diff --git a/safe_control_gym/safety_filters/mpsc/nl_mpsc.py b/safe_control_gym/safety_filters/mpsc/nl_mpsc.py new file mode 100644 index 000000000..de1f40677 --- /dev/null +++ b/safe_control_gym/safety_filters/mpsc/nl_mpsc.py @@ -0,0 +1,1016 @@ +'''NL Model Predictive Safety Certification (NL MPSC). + +The core idea is that any learning controller input can be either certificated as safe or, if not safe, corrected +using an MPC controller based on Robust NL MPC. + +Based on + * K.P. Wabsersich and M.N. Zeilinger 'Linear model predictive safety certification for learning-based control' 2019 + https://arxiv.org/pdf/1803.08552.pdf + * J. Köhler, R. Soloperto, M. A. Müller, and F. Allgöwer, “A computationally efficient robust model predictive + control framework for uncertain nonlinear systems -- extended version,” IEEE Trans. Automat. Contr., vol. 66, + no. 2, pp. 794 801, Feb. 2021, doi: 10.1109/TAC.2020.2982585. http://arxiv.org/abs/1910.12081 +''' + +import pickle + +import casadi as cs +import cvxpy as cp +import numpy as np +from acados_template import AcadosOcp, AcadosOcpSolver +from acados_template.acados_model import AcadosModel +from pytope import Polytope +from scipy.linalg import block_diag, solve_discrete_are, sqrtm + +from safe_control_gym.controllers.mpc.mpc_utils import discretize_linear_system, rk_discrete +from safe_control_gym.envs.benchmark_env import Environment, Task +from safe_control_gym.safety_filters.cbf.cbf_utils import cartesian_product +from safe_control_gym.safety_filters.mpsc.mpsc import MPSC +from safe_control_gym.safety_filters.mpsc.mpsc_utils import Cost_Function, box2polytopic + + +class NL_MPSC(MPSC): + '''Model Predictive Safety Certification Class.''' + + def __init__(self, + env_func, + horizon: int = 10, + q_mpc: list = None, + r_mpc: list = None, + integration_algo: str = 'rk4', + warmstart: bool = True, + additional_constraints: list = None, + use_terminal_set: bool = True, + n_samples: int = 600, + cost_function: Cost_Function = Cost_Function.ONE_STEP_COST, + mpsc_cost_horizon: int = 5, + decay_factor: float = 0.85, + soften_constraints: bool = False, + slack_cost: float = 250, + **kwargs + ): + '''Initialize the MPSC. + + Args: + env_func (partial BenchmarkEnv): Environment for the task. + horizon (int): The MPC horizon. + integration_algo (str): The algorithm used for integrating the dynamics, + either 'rk4', 'rk', or 'cvodes'. + warmstart (bool): If the previous MPC soln should be used to warmstart the next mpc step. + additional_constraints (list): List of additional constraints to consider. + use_terminal_set (bool): Whether to use a terminal set constraint or not. + n_samples (int): The number of state/action pairs to test when determining w_func. + cost_function (Cost_Function): A string (from Cost_Function) representing the cost function to be used. + mpsc_cost_horizon (int): How many steps forward to check for constraint violations. + decay_factor (float): How much to discount future costs. + ''' + + self.model_bias = None + super().__init__(env_func, horizon, q_mpc, r_mpc, integration_algo, warmstart, additional_constraints, use_terminal_set, cost_function, mpsc_cost_horizon, decay_factor, **kwargs) + + self.n_samples = n_samples + self.soften_constraints = soften_constraints + self.slack_cost = slack_cost + + self.n = self.model.nx + self.m = self.model.nu + self.q = self.model.nx + + self.state_constraint = self.constraints.state_constraints[0] + self.input_constraint = self.constraints.input_constraints[0] + + [self.X_mid, L_x, l_x] = box2polytopic(self.state_constraint) + [self.U_mid, L_u, l_u] = box2polytopic(self.input_constraint) + + # number of constraints + p_x = l_x.shape[0] + p_u = l_u.shape[0] + self.p = p_x + p_u + + self.L_x = np.vstack((L_x, np.zeros((p_u, self.n)))) + self.L_u = np.vstack((np.zeros((p_x, self.m)), L_u)) + self.l_xu = np.concatenate([l_x, l_u]) + + def set_dynamics(self): + '''Compute the discrete dynamics.''' + + if self.integration_algo == 'LTI': + dfdxdfdu = self.model.df_func(x=self.X_EQ, u=self.U_EQ) + self.Ac = dfdxdfdu['dfdx'].toarray() + self.Bc = dfdxdfdu['dfdu'].toarray() + + delta_x = self.model.x_sym + delta_u = self.model.u_sym + delta_w = cs.MX.sym('delta_w', self.model.nx, 1) + + self.Ad, self.Bd = discretize_linear_system(self.Ac, self.Bc, self.dt, exact=True) + + x_dot_lin_vec = self.Ad @ delta_x + self.Bd @ delta_u + + if self.model_bias is not None: + x_dot_lin_vec = x_dot_lin_vec + self.model_bias + + dynamics_func = cs.Function('fd', + [delta_x, delta_u], + [x_dot_lin_vec], + ['x0', 'p'], + ['xf']) + + self.Ac = cs.Function('Ac', [delta_x, delta_u, delta_w], [self.Ac], ['x', 'u', 'w'], ['Ac']) + self.Bc = cs.Function('Bc', [delta_x, delta_u, delta_w], [self.Bc], ['x', 'u', 'w'], ['Bc']) + + self.Ad = cs.Function('Ad', [delta_x, delta_u, delta_w], [self.Ad], ['x', 'u', 'w'], ['Ad']) + self.Bd = cs.Function('Bd', [delta_x, delta_u, delta_w], [self.Bd], ['x', 'u', 'w'], ['Bd']) + elif self.integration_algo == 'rk4': + dynamics_func = rk_discrete(self.model.fc_func, + self.model.nx, + self.model.nu, + self.dt) + else: + dynamics_func = cs.integrator('fd', self.integration_algo, + {'x': self.model.x_sym, + 'p': self.model.u_sym, + 'ode': self.model.x_dot}, {'tf': self.dt} + ) + + self.dynamics_func = dynamics_func + + def learn(self, + env=None, + **kwargs + ): + '''Compute values used by the MPC. + + Args: + env (BenchmarkEnv): If a different environment is to be used for learning, can supply it here. + ''' + + if env is None: + env = self.training_env + + if self.env.NAME == Environment.CARTPOLE: + self.x_r = np.array([self.X_EQ[0], 0, 0, 0]) + elif self.env.NAME == Environment.QUADROTOR and self.env.QUAD_TYPE in [2, 4]: + self.x_r = np.array([self.X_EQ[0], 0, self.X_EQ[2], 0, 0, 0]) + elif self.env.NAME == Environment.QUADROTOR and self.env.QUAD_TYPE == 3: + self.x_r = np.array([self.X_EQ[0], 0, self.X_EQ[2], 0, self.X_EQ[4], 0, 0, 0, 0, 0, 0, 0]) + self.u_r = self.U_EQ + + x_sym = self.model.x_sym + u_sym = self.model.u_sym + w_sym = cs.MX.sym('delta_w', self.q, 1) + + self.get_error_function(env=env) + if self.integration_algo == 'rk4': + self.Ec = np.diag(self.max_w_per_dim) / self.dt + + self.f = cs.Function('f', [x_sym, u_sym, w_sym], [self.model.fc_func(x_sym + self.X_mid, u_sym + self.U_mid) + self.Ec @ w_sym], ['x', 'u', 'w'], ['f']) + phi_1 = cs.Function('phi_1', [x_sym, u_sym, w_sym], [self.f(x_sym, u_sym, w_sym)], ['x', 'u', 'w'], ['phi_1']) + phi_2 = cs.Function('phi_2', [x_sym, u_sym, w_sym], [self.f(x_sym + 0.5 * self.dt * phi_1(x_sym, u_sym, w_sym), u_sym, w_sym)], ['x', 'u', 'w'], ['phi_2']) + phi_3 = cs.Function('phi_3', [x_sym, u_sym, w_sym], [self.f(x_sym + 0.5 * self.dt * phi_2(x_sym, u_sym, w_sym), u_sym, w_sym)], ['x', 'u', 'w'], ['phi_3']) + phi_4 = cs.Function('phi_4', [x_sym, u_sym, w_sym], [self.f(x_sym + self.dt * phi_3(x_sym, u_sym, w_sym), u_sym, w_sym)], ['x', 'u', 'w'], ['phi_4']) + rungeKutta = x_sym + self.dt / 6 * (phi_1(x_sym, u_sym, w_sym) + 2 * phi_2(x_sym, u_sym, w_sym) + 2 * phi_3(x_sym, u_sym, w_sym) + phi_4(x_sym, u_sym, w_sym)) + self.disc_f = cs.Function('disc_f', [x_sym, u_sym, w_sym], [rungeKutta + self.X_mid], ['x', 'u', 'w'], ['disc_f']) + + self.Ac = cs.Function('Ac', [x_sym, u_sym, w_sym], [cs.jacobian(self.f(x_sym, u_sym, w_sym), x_sym)], ['x', 'u', 'w'], ['Ac']) + self.Bc = cs.Function('Bc', [x_sym, u_sym, w_sym], [cs.jacobian(self.f(x_sym, u_sym, w_sym), u_sym)], ['x', 'u', 'w'], ['Bc']) + + self.Ad = cs.Function('Ad', [x_sym, u_sym, w_sym], [cs.jacobian(self.disc_f(x_sym, u_sym, w_sym), x_sym)], ['x', 'u', 'w'], ['Ad']) + self.Bd = cs.Function('Bd', [x_sym, u_sym, w_sym], [cs.jacobian(self.disc_f(x_sym, u_sym, w_sym), u_sym)], ['x', 'u', 'w'], ['Bd']) + elif self.integration_algo == 'LTI': + self.Ed = np.diag(self.max_w_per_dim) + self.Ec = self.Ed / self.dt + + self.f = cs.Function('disc_f', [x_sym, u_sym, w_sym], [self.Ac(x_sym, u_sym, w_sym) @ x_sym + self.Bc(x_sym, u_sym, w_sym) @ u_sym + self.Ec @ w_sym], ['x', 'u', 'w'], ['disc_f']) + self.disc_f = cs.Function('disc_f', [x_sym, u_sym, w_sym], [self.Ad(x_sym, u_sym, w_sym) @ x_sym + self.Bd(x_sym, u_sym, w_sym) @ u_sym + self.Ed @ w_sym], ['x', 'u', 'w'], ['disc_f']) + + self.synthesize_lyapunov() + self.get_terminal_ingredients() + + self.L_x_sym = cs.MX(self.L_x) + self.L_u_sym = cs.MX(self.L_u) + self.L_size = np.sum(np.abs(self.L_x), axis=1) + np.sum(np.abs(self.L_u), axis=1) + self.L_size_sym = cs.MX(self.L_size) + self.l_sym = cs.MX(self.l_xu) + self.setup_optimizer() + + def get_error_function(self, env): + '''Computes the maximum disturbance found in the training environment. + + Args: + env (BenchmarkEnv): If a different environment is to be used for learning, can supply it here. + ''' + + if env is None: + env = self.training_env + + # Create set of error residuals. + w = np.zeros((self.n_samples, self.n)) + states = np.zeros((self.n_samples, self.n)) + actions = np.zeros((self.n_samples, self.m)) + + # Use uniform sampling of control inputs and states. + for i in range(self.n_samples): + init_state = env.reset()[0][:self.n] + states[i, :] = init_state + if self.env.NAME == Environment.QUADROTOR: + u = np.random.rand(self.model.nu) / 20 - 1 / 40 + self.U_EQ + else: + u = env.action_space.sample() # Will yield a random action within action space. + actions[i, :] = u + x_next_obs = env.step(u)[0][:self.n] + x_next_estimate = np.squeeze(self.dynamics_func(x0=init_state, p=u)['xf'].toarray()) + w[i, :] = x_next_obs - x_next_estimate + + print('MEAN ERROR PER DIM:', np.mean(w, axis=0)) + self.model_bias = np.mean(w, axis=0) + # self.model_bias = np.array([-4.938e-07, -3.543e-05, -8.896e-07, -1.934e-04, -1.112e-05, -2.644e-05]) + self.set_dynamics() + + num_stds = 2 + w = w - np.mean(w, axis=0) + normed_w = np.linalg.norm(w, axis=1) + self.max_w_per_dim = np.minimum(np.max(np.abs(w), axis=0), np.mean(np.abs(w), axis=0) + num_stds * np.std(np.abs(w), axis=0)) + self.max_w = min(np.max(normed_w), np.mean(normed_w) + num_stds * np.std(normed_w)) + # self.max_w_per_dim = np.array([0.00052966, 0.04261379, 0.00027445, 0.02223242, 0.00361825, 0.01043795])/10.0 + # self.max_w = 0.04483104869135911/10.0 + + print('MAX ERROR:', np.max(normed_w)) + print('STD ERROR:', np.mean(normed_w) + num_stds * np.std(normed_w)) + print('MEAN ERROR:', np.mean(normed_w)) + print('MAX ERROR PER DIM:', np.max(np.abs(w), axis=0)) + print('STD ERROR PER DIM:', np.mean(np.abs(w), axis=0) + num_stds * np.std(np.abs(w), axis=0)) + print('TOTAL ERRORS BY CHANNEL:', np.sum(np.abs(w), axis=0)) + + # if self.integration_algo == 'LTI': + # degree = 1 + # num_stds = 2 + # else: + # degree = 2 + # num_stds = 3 + # self.error_parameters = get_error_parameters(states, actions, normed_w, degree) + # def w_func(state, action): + # input_vec = cs.horzcat(state.T, action.T) + # return error_function(*self.error_parameters, num_stds, input_vec) + # self.w_func = w_func + + def synthesize_lyapunov(self): + '''Synthesize the appropriate constants related to the lyapunov function of the system.''' + # Incremental Lyapunov function: Find upper bound for S-procedure variable lambda + lamb_lb = None + lamb_ub = None + + lamb = 0.008 # lambda lower bound + self.rho_c = 0.192 # tuning parameter determines how fast the lyapunov function contracts + + if self.integration_algo == 'LTI': + self.Theta = [0] + elif self.env.NAME == Environment.CARTPOLE or (self.env.NAME == Environment.QUADROTOR and self.env.QUAD_TYPE in [2, 4]): + self.Theta = [self.state_constraint.lower_bounds[-2], 0, self.state_constraint.upper_bounds[-2]] + else: + self.Theta = [self.state_constraint.lower_bounds[6], 0, self.state_constraint.upper_bounds[6]] + + while lamb < 100: + lamb = lamb * 2 + [X, Y, cost, constraints] = self.setup_tube_optimization(lamb) + prob = cp.Problem(cp.Minimize(cost), constraints) + try: + print(f'Attempting with lambda={lamb}.') + cost = prob.solve(solver=cp.MOSEK, verbose=False) + if prob.status == 'optimal' and cost != float('inf'): + print(f'Succeeded with cost={cost}.') + if lamb_lb is None: + lamb_lb = lamb + lamb_ub = lamb + else: + raise Exception('Not optimal or cost is infinite.') + except Exception as e: + print('Error in optimization:', e) + if lamb_lb is not None: + break + + # Incremental Lyapunov function: Determine optimal lambda + lamb_lb = lamb_lb / 2 + lamb_ub = lamb_ub * 2 + + num_candidates = 50 + + lambda_candidates = np.logspace(np.log(lamb_lb) / np.log(10), np.log(lamb_ub) / np.log(10), num_candidates) + cost_values = [] + + for i in range(num_candidates): + lambda_candidate = lambda_candidates[i] + [X, Y, cost, constraints] = self.setup_tube_optimization(lambda_candidate) + prob = cp.Problem(cp.Minimize(cost), constraints) + try: + cost = prob.solve(solver=cp.MOSEK, verbose=False) + if prob.status != 'optimal' or cost == float('inf'): + raise cp.SolverError + except Exception as e: + print('Error in optimization:', e) + cost = float('inf') + cost_values += [cost] + + best_index = cost_values.index(min(cost_values)) + best_lamb = lambda_candidates[best_index] + [X, Y, cost, constraints] = self.setup_tube_optimization(best_lamb) + prob = cp.Problem(cp.Minimize(cost), constraints) + cost = prob.solve(solver=cp.MOSEK, verbose=False) + if prob.status != 'optimal' or cost == float('inf'): + raise cp.SolverError + + # Resulting continuous-time parameters + self.X = X.value + self.P = np.linalg.pinv(self.X) + self.K = Y.value @ self.P + + self.c_js = np.zeros(self.p) + + for j in range(self.p): + self.c_js[j] = np.linalg.norm((self.L_x[j, :] + self.L_u[j, :] @ self.K) @ sqrtm(self.X)) + + c_max = max(self.c_js) + w_bar_c = np.sqrt(np.max(np.linalg.eig(self.Ec.T @ self.P @ self.Ec)[0])) + + # Get Discrete-time system values + self.rho = np.exp(-self.rho_c * self.dt) + self.w_bar = w_bar_c * (1 - self.rho) / self.rho_c # even using rho_c from the paper yields different w_bar + # self.w_bar = max(self.w_bar, self.max_w) + horizon_multiplier = (1 - self.rho**self.horizon) / (1 - self.rho) + self.s_bar_f = horizon_multiplier * self.w_bar + # assert self.s_bar_f > self.max_w * horizon_multiplier, f'[ERROR] s_bar_f ({self.s_bar_f}) is too small with respect to max_w ({self.max_w}).' + # assert self.max_w * horizon_multiplier < 1.0, '[ERROR] max_w is too large and will overwhelm terminal set.' + self.gamma = 1 / c_max - self.s_bar_f + + self.delta_loc = (horizon_multiplier * self.w_bar)**2 + + print(f'rho: {self.rho}') + print(f'w_bar: {self.w_bar}') + print(f's_bar_f: {self.s_bar_f}') + print(f'gamma: {self.gamma}') + + self.check_decay_rate() + self.check_lyapunov_func() + + def get_terminal_ingredients(self): + '''Calculate the terminal ingredients of the MPC optimization.''' + # Solve Lyapunov SDP using linearized discrete-time dynamics based on RK4 for terminal ingredients + w_none = np.zeros((self.q, 1)) + A_lin = self.Ad(self.x_r - self.X_mid, self.u_r - self.U_mid, w_none).toarray() + B_lin = self.Bd(self.x_r - self.X_mid, self.u_r - self.U_mid, w_none).toarray() + + self.P_f = solve_discrete_are(A_lin, B_lin, self.Q, self.R) + btp = np.dot(B_lin.T, self.P_f) + self.K_f = -np.dot(np.linalg.inv(self.R + np.dot(btp, B_lin)), np.dot(btp, A_lin)) + # self.check_terminal_ingredients() + # self.check_terminal_constraints() + + if self.integration_algo == 'LTI' and self.use_terminal_set: + self.get_terminal_constraint() + + def setup_tube_optimization(self, lamb): + '''Sets up the optimization to find the lyapunov function. + + Args: + lamb (float): The S-procedure constant. + + Returns: + X (cp.Variable): The X variable in the optimization. + Y (cp.Variable): The Y variable in the optimization. + Cost (cp.Expression): The cost function expression. + Constraints (list): The list of cvxpy expressions representing the constraints. + ''' + + X = cp.Variable((self.n, self.n), PSD=True, name='X', complex=False) + Y = cp.Variable((self.m, self.n), name='Y', complex=False) + + Cost = -cp.log_det(X) + + Constraints = [] + + x_test = np.zeros((self.n, 1)) + u_test = self.U_EQ + w_test = np.zeros((self.q, 1)) + + for angle in self.Theta: + if self.env.NAME == Environment.CARTPOLE or (self.env.NAME == Environment.QUADROTOR and self.env.QUAD_TYPE in [2, 4]): + x_test[-2] = angle + else: + x_test[-4] = angle + x_test[-5] = angle + x_test[-6] = angle + A_theta = self.Ac(x_test, u_test - self.U_mid, w_test).toarray() + B_theta = self.Bc(x_test, u_test - self.U_mid, w_test).toarray() + + AXBY = A_theta @ X + B_theta @ Y + + constraint_1 = AXBY + AXBY.T + 2 * self.rho_c * X + constraint_2 = cp.bmat([[AXBY + AXBY.T + lamb * X, self.Ec], [self.Ec.T, -lamb * np.eye(self.q)]]) + + Constraints += [constraint_1 << 0] + Constraints += [constraint_2 << 0] + + for j in range(0, self.p): + LXLY = self.L_x[j:j + 1, :] @ X + self.L_u[j:j + 1, :] @ Y + + constraint_3 = cp.bmat([[np.array([[1]]), LXLY], [LXLY.T, X]]) + Constraints += [constraint_3 >> 0] + + return X, Y, Cost, Constraints + + def randsphere(self, num, dim, r): + '''This function returns an num by dim array in which + each of the num rows has the dim Cartesian coordinates + of a random point uniformly-distributed over the + interior of an dim-dimensional hypersphere with + radius r and center at the origin. + + Args: + num (int): The number of vectors. + dim (int): The dimension of the hypersphere. + r (float): The radius of the hypersphere. + + Returns: + vectors (ndarray): The resulting random points inside the hypersphere. + ''' + + vectors = [] + + while len(vectors) < num: + u = np.random.normal(0, 1, dim) # an array of d normally distributed random variables + norm = np.sum(u**2)**(0.5) + radius = r * np.random.rand()**(1.0 / dim) + vec = radius * u / norm + vectors.append(vec) + + return np.vstack(vectors) + + def check_decay_rate(self): + '''Check the decay rate.''' + + x_test = np.zeros((self.n, 1)) + u_test = self.U_EQ + w_test = np.zeros((self.q, 1)) + + X_sqrt = sqrtm(self.X) + P_sqrt = sqrtm(self.P) + for angle in self.Theta: + if self.env.NAME == Environment.CARTPOLE or (self.env.NAME == Environment.QUADROTOR and self.env.QUAD_TYPE in [2, 4]): + x_test[-2] = angle + else: + x_test[-4] = angle + x_test[-5] = angle + x_test[-6] = angle + A_theta = self.Ac(x_test, u_test - self.U_mid, w_test).toarray() + B_theta = self.Bc(x_test, u_test - self.U_mid, w_test).toarray() + left_side = max(np.linalg.eig(X_sqrt @ (A_theta + B_theta @ self.K).T @ P_sqrt + P_sqrt @ (A_theta + B_theta @ self.K) @ X_sqrt)[0]) + 2 * self.rho_c + assert left_side <= 0.0001, f'[ERROR] The solution {left_side} is not within the tolerance {0.0001}' + + def check_lyapunov_func(self): + '''Check the incremental Lyapunov function.''' + + # select the number of random vectors to check + num_random_vectors = 10000 + + # Sample random points inside the set V_delta(x, z) <= delta_loc + delta_x = self.randsphere(num_random_vectors, self.n, self.delta_loc).T + dx_transform = np.linalg.inv(sqrtm(self.P)) @ delta_x + dx_transform = self.x_r[:, None] + dx_transform # transform point from error to actual state + + # sample random disturbance bounded by max_w + w_dist = self.randsphere(num_random_vectors, self.q, self.max_w).T + + # set arbitrary v that satisfies the constraints for testing + v = np.array(self.constraints.input_constraints[0].upper_bounds) / 10 + + # initialize counters + num_valid = 0 + inside_set = 0 + is_invariant = 0 + + for i in range(num_random_vectors): + # get random state + x_i = dx_transform[:, i] + + # set up control inputs (u_r is required to get f_kappa(0, 0) = 0) + u_x = self.K @ x_i + v + self.u_r + u_z = self.K @ self.x_r + v + self.u_r + + # get dynamics + w_none = np.zeros((self.q, 1)) + x_dot = np.squeeze(self.f(x_i - self.X_mid, u_x - self.U_mid, w_none).toarray()) + z_dot = np.squeeze(self.f(self.x_r - self.X_mid, u_z - self.U_mid, w_none).toarray()) + + # evaluate Lyapunov function and its time derivative + V_d = (x_i - self.x_r).T @ self.P @ (x_i - self.x_r) + dVdt = (x_i - self.x_r).T @ self.P @ (x_dot - z_dot) + + # Check incremental Lypaunov function condition + if dVdt <= -self.rho_c * V_d: + num_valid += 1 + + # check if states are inside V_d(x_i, z) <= delta_loc + if V_d <= self.delta_loc: + inside_set += 1 + + # get next state + x_plus = np.squeeze(self.disc_f(x_i - self.X_mid, u_x - self.U_mid, w_dist[:, i]).toarray()) + V_d_plus = (x_plus - self.x_r).T @ self.P @ (x_plus - self.x_r) + + # check robust control invariance + if V_d_plus <= self.delta_loc: + is_invariant += 1 + + print('NUM_VALID:', num_valid / num_random_vectors) + print('INSIDE SET:', inside_set / num_random_vectors) + print('IS INVARIANT:', is_invariant / num_random_vectors) + + def check_terminal_ingredients(self): + '''Check the terminal ingredients.''' + + w_none = np.zeros((self.q, 1)) + num_random_vectors = 10000 + + # Sample points from gamma^2 * unit sphere + delta_x = self.randsphere(num_random_vectors, self.n, self.gamma**2).T + + # Transform sampled points into ellipsoid to span the candidate terminal + # set and shift around reference point x_r + dx_transform = np.linalg.inv(sqrtm(self.P_f)) @ delta_x + dx_transform = self.x_r[:, None] + dx_transform + + # sample random disturbance bounded by max_w + w_dist = self.randsphere(num_random_vectors, self.q, self.max_w).T + + # initialize counter + num_valid = 0 + inside_set = 0 + + for i in range(num_random_vectors): + # get sampled vector + x_i = dx_transform[:, i] + + # get terminal control input + u = self.K_f @ (x_i - self.x_r) + self.u_r + + # simulate system using control input + x_plus = np.squeeze(self.disc_f(x_i - self.X_mid, u - self.U_mid, w_none).toarray()) + + # disturbed x_plus + x_plus_noisy = np.squeeze(self.disc_f(x_i - self.X_mid, u - self.U_mid, w_dist[:, i]).toarray()) + + # evaluate stage cost and terminal costs + stage = (x_i - self.x_r).T @ self.Q @ (x_i - self.x_r) + V_f = (x_i - self.x_r).T @ self.P_f @ (x_i - self.x_r) + V_f_plus = (x_plus - self.x_r).T @ self.P_f @ (x_plus - self.x_r) + + # check Lyapunov condition for terminal cost + if V_f_plus <= V_f - stage: + num_valid += 1 + + # check if noisy state is still in terminal set + V_f_plus_noisy = (x_plus_noisy - self.x_r).T @ self.P_f @ (x_plus_noisy - self.x_r) + if V_f_plus_noisy <= self.gamma**2: + inside_set += 1 + + print('NUM_VALID:', num_valid / num_random_vectors) + print('INSIDE SET:', inside_set / num_random_vectors) + + def check_terminal_constraints(self, + num_points: int = 40, + ): + ''' + Check if the provided terminal set is only contains valid states using a gridded approach. + + Args: + num_points (int): The number of points in each dimension to check. + + Returns: + valid_cbf (bool): Whether the provided CBF candidate is valid. + infeasible_states (list): List of all states for which the QP is infeasible. + ''' + + # Determine if terminal set inside state constraints + terminal_max = np.sqrt(np.diag(np.linalg.inv(self.P_f / self.gamma**2))) + terminal_min = -np.sqrt(np.diag(np.linalg.inv(self.P_f / self.gamma**2))) + + max_bounds = np.zeros((self.n)) + min_bounds = np.zeros((self.n)) + for i in range(self.n): + tighten_by_max = self.c_js[i * 2] * self.s_bar_f + tighten_by_min = self.c_js[i * 2 + 1] * self.s_bar_f + max_bounds[i] = 1.0 / self.L_x[i * 2, i] * (self.l_xu[i * 2] - tighten_by_max) + min_bounds[i] = 1.0 / self.L_x[i * 2 + 1, i] * (self.l_xu[i * 2 + 1] - tighten_by_min) + + if np.any(terminal_max > max_bounds) or np.any(terminal_min < min_bounds): + raise ValueError('Terminal set is not constrained within the constraint set.') + + # Determine if the maximum input is within input constraints + x = cp.Variable((self.n, 1)) + C = np.linalg.cholesky(self.P_f).T + cost = cp.Maximize(self.K_f[0, :] @ x) + constraint = [cp.norm(C @ x) <= self.gamma] + prob = cp.Problem(cost, constraint) + max_input = prob.solve(solver=cp.MOSEK) + + max_bounds = np.zeros((self.m)) + min_bounds = np.zeros((self.m)) + for i in range(self.m): + tighten_by_max = self.c_js[self.n * 2 + i * 2] * self.s_bar_f + tighten_by_min = self.c_js[self.n * 2 + i * 2 + 1] * self.s_bar_f + max_bounds[i] = 1.0 / self.L_u[self.n * 2 + i * 2, i] * (self.l_xu[self.n * 2 + i * 2] - tighten_by_max) + min_bounds[i] = 1.0 / self.L_u[self.n * 2 + i * 2 + 1, i] * (self.l_xu[self.n * 2 + i * 2 + 1] - tighten_by_min) + + if np.any(max_input + self.u_r > max_bounds + self.U_mid) or np.any(-max_input + self.u_r < min_bounds + self.U_mid): + raise ValueError(f'Terminal controller causes inputs (max_input: {-max_input+self.u_r[0]}/{max_input+self.u_r[0]}) outside of input constraints (constraints: {min_bounds[0] + self.U_mid[0]}/{max_bounds[0] + self.U_mid[0]}).') + + # Make sure that every vertex is checked + num_points = max(2 * self.n, num_points + num_points % (2 * self.n)) + num_points_per_dim = num_points // self.n + + # Create the lists of states to check + states_to_sample = [np.linspace(self.X_mid[i], terminal_max[i] + self.X_mid[i], num_points_per_dim) for i in range(self.n)] + states_to_check = cartesian_product(*states_to_sample) + + num_states_inside_set = 0 + failed_checks = 0 + failed_29a = 0 + failed_29b = 0 + failed_29d = 0 + + for state in states_to_check: + terminal_cost = (state - self.X_mid).T @ self.P_f @ (state - self.X_mid) + in_terminal_set = terminal_cost < self.gamma**2 + + if in_terminal_set: + num_states_inside_set += 1 + failed = False + + # Testing condition 29a + stable_input = self.K_f @ (state - self.x_r) + self.u_r + next_state = np.squeeze(self.disc_f(state - self.X_mid, stable_input - self.U_mid, np.zeros((self.q, 1))).toarray()) + stage_cost = (state.T - self.X_mid) @ self.Q @ (state - self.X_mid) + next_terminal_cost = (next_state - self.X_mid).T @ self.P_f @ (next_state - self.X_mid) + + if terminal_cost - stage_cost != 0 and next_terminal_cost / (terminal_cost - stage_cost) > 1.01: + failed_29a += 1 + failed = True + + # Testing condition 29b + num_disturbances = 100 + disturbances = self.randsphere(num_disturbances, self.n, self.max_w).T + for w in range(num_disturbances): + disturbed_state = next_state + disturbances[:, w] + terminal_cost = (disturbed_state - self.X_mid).T @ self.P_f @ (disturbed_state - self.X_mid) + in_terminal_set = terminal_cost < self.gamma**2 + + if not in_terminal_set: + failed_29b += 1 + failed = True + break + + # Testing condition 29d + for j in range(self.p): + constraint_satisfaction = self.L_x[j, :] @ (state - self.X_mid) + self.L_u[j, :] @ (stable_input - self.U_mid) - self.l_xu[j] + self.c_js[j] * self.s_bar_f <= 0 + if not constraint_satisfaction: + failed_29d += 1 + failed = True + break + + if failed: + failed_checks += 1 + + print(f'Number of states checked: {len(states_to_check)}') + print(f'Number of states inside terminal set: {num_states_inside_set}') + print(f'Number of checks failed: {failed_checks}') + print(f'Number of checks failed due to 29a: {failed_29a}') + print(f'Number of checks failed due to 29b: {failed_29b}') + print(f'Number of checks failed due to 29d: {failed_29d}') + + def get_terminal_constraint(self): + '''Calculates the terminal set as a linear constraint''' + interior_points = [] + for _ in range(self.n_samples): + state = np.random.uniform(self.state_constraint.lower_bounds, self.state_constraint.upper_bounds) + if (state - self.X_mid).T @ self.P_f @ (state - self.X_mid) <= self.gamma**2: + interior_points.append(state) + self.terminal_set = Polytope(interior_points) + self.terminal_set.minimize_V_rep() + + self.terminal_A = self.terminal_set.A + self.terminal_b = self.terminal_set.b + + def load(self, + path, + ): + '''Load values used by the MPSC. + + Args: + path (str): Path to the required file. + ''' + + with open(path, 'rb') as f: + parameters = pickle.load(f) + + self.rho_c = parameters['rho_c'] + self.Theta = parameters['Theta'] + self.X = parameters['X'] + self.K = parameters['K'] + self.P = parameters['P'] + self.delta_loc = parameters['delta_loc'] + self.rho = parameters['rho'] + self.s_bar_f = parameters['s_bar_f'] + self.w_bar = parameters['w_bar'] + self.max_w = parameters['max_w'] + # self.error_parameters = parameters['error_parameters'] + # if self.integration_algo == 'LTI': + # num_stds = 2 + # else: + # num_stds = 3 + # def w_func(state, action): + # input_vec = cs.horzcat(state.T, action.T) + # return error_function(*self.error_parameters, num_stds, input_vec) + # self.w_func = w_func + self.c_js = parameters['c_js'] + self.gamma = parameters['gamma'] + self.P_f = parameters['P_f'] + self.K_f = parameters['K_f'] + self.model_bias = parameters['model_bias'] + + self.set_dynamics() + + if self.integration_algo == 'LTI' and self.use_terminal_set is True: + self.terminal_A = parameters['terminal_A'] + self.terminal_b = parameters['terminal_b'] + + self.L_x_sym = cs.MX(self.L_x) + self.L_u_sym = cs.MX(self.L_u) + self.L_size = np.sum(np.abs(self.L_x), axis=1) + np.sum(np.abs(self.L_u), axis=1) + self.L_size_sym = cs.MX(self.L_size) + self.l_sym = cs.MX(self.l_xu) + + self.setup_optimizer() + + def save(self, path): + '''Save values used by the MPSC. + + Args: + path (str): Name of the file to be created. + ''' + + parameters = {} + parameters['rho_c'] = self.rho_c + parameters['Theta'] = self.Theta + parameters['X'] = self.X + parameters['K'] = self.K + parameters['P'] = self.P + parameters['delta_loc'] = self.delta_loc + parameters['rho'] = self.rho + parameters['s_bar_f'] = self.s_bar_f + parameters['w_bar'] = self.w_bar + parameters['max_w'] = self.max_w + # parameters['error_parameters'] = self.error_parameters + parameters['c_js'] = self.c_js + parameters['gamma'] = self.gamma + parameters['P_f'] = self.P_f + parameters['K_f'] = self.K_f + parameters['model_bias'] = self.model_bias + + if self.integration_algo == 'LTI' and self.use_terminal_set is True: + parameters['terminal_A'] = self.terminal_A + parameters['terminal_b'] = self.terminal_b + + with open(path, 'wb') as f: + pickle.dump(parameters, f) + + def setup_casadi_optimizer(self): + '''Setup the certifying MPC problem.''' + + # Horizon parameter. + horizon = self.horizon + nx, nu = self.model.nx, self.model.nu + # Define optimizer and variables. + if self.integration_algo == 'LTI': + opti = cs.Opti('conic') + elif self.integration_algo == 'rk4': + opti = cs.Opti() + # States. + z_var = opti.variable(nx, horizon + 1) + # Inputs. + v_var = opti.variable(nu, horizon) + # Lyapunov bound. + s_var = opti.variable(1, horizon + 1) + # Certified input. + next_u = opti.variable(nu, 1) + # Desired input. + u_L = opti.parameter(nu, 1) + # Current observed state. + x_init = opti.parameter(nx, 1) + # Reference trajectory and predicted LQR gains + if self.env.TASK == Task.STABILIZATION: + X_GOAL = opti.parameter(1, nx) + elif self.env.TASK == Task.TRAJ_TRACKING: + X_GOAL = opti.parameter(self.horizon, nx) + + if self.soften_constraints: + slack = opti.variable(1, 1) + slack_term = opti.variable(1, 1) + else: + slack = opti.variable(1, 1) + slack_term = opti.variable(1, 1) + opti.subject_to(slack == 0) + opti.subject_to(slack_term == 0) + + for i in range(self.horizon): + # Dynamics constraints + next_state = self.dynamics_func(x0=z_var[:, i], p=v_var[:, i])['xf'] + opti.subject_to(z_var[:, i + 1] == next_state) + + # Lyapunov size increase + opti.subject_to(s_var[:, i + 1] == self.rho * s_var[:, i] + self.max_w) # self.w_func(z_var[:, i], v_var[:, i])) + opti.subject_to(s_var[:, i] <= self.s_bar_f) + # opti.subject_to(self.w_func(z_var[:, i], v_var[:, i]) <= self.max_w) + + # Constraints + for j in range(self.p): + tighten_by = self.c_js[j] * s_var[:, i + 1] + if self.soften_constraints: + opti.subject_to(self.L_x_sym[j, :] @ (z_var[:, i + 1] - self.X_mid) + self.L_u_sym[j, :] @ (v_var[:, i] - self.U_mid) - self.l_sym[j] + tighten_by <= slack) + opti.subject_to(slack >= 0) + else: + opti.subject_to(self.L_x_sym[j, :] @ (z_var[:, i + 1] - self.X_mid) + self.L_u_sym[j, :] @ (v_var[:, i] - self.U_mid) - self.l_sym[j] + tighten_by <= 0) + + # Final state constraints + if self.use_terminal_set: + if self.integration_algo == 'LTI': + if self.soften_constraints: + opti.subject_to(cs.vec(self.terminal_A @ (z_var[:, -1] - self.X_mid) - self.terminal_b) <= slack_term) + opti.subject_to(slack_term >= 0) + else: + opti.subject_to(cs.vec(self.terminal_A @ (z_var[:, -1] - self.X_mid) - self.terminal_b) <= 0) + elif self.integration_algo == 'rk4': + terminal_cost = (z_var[:, -1] - self.X_mid).T @ self.P_f @ (z_var[:, -1] - self.X_mid) + if self.soften_constraints: + opti.subject_to(terminal_cost <= self.gamma**2 + slack_term) + opti.subject_to(slack_term >= 0) + else: + opti.subject_to(terminal_cost <= self.gamma**2) + else: + if self.soften_constraints: + opti.subject_to(slack_term == 0) + + # Initial state constraints + opti.subject_to(z_var[:, 0] == x_init) + opti.subject_to(s_var[:, 0] == 0) + + # Real input + opti.subject_to(next_u == v_var[:, 0]) + + # Create solver (IPOPT solver as of this version). + if self.integration_algo == 'LTI': + opts = {'expand': True, 'printLevel': 'none'} + opti.solver('qpoases', opts) + elif self.integration_algo == 'rk4': + opts = {'expand': True, + 'ipopt.print_level': 0, + 'ipopt.sb': 'yes', + 'ipopt.max_iter': 50, + 'print_time': 0} + opti.solver('ipopt', opts) + self.opti_dict = { + 'opti': opti, + 'z_var': z_var, + 'v_var': v_var, + 's_var': s_var, + 'u_L': u_L, + 'x_init': x_init, + 'next_u': next_u, + 'X_GOAL': X_GOAL, + 'slack': slack, + 'slack_term': slack_term, + } + + # Cost (# eqn 5.a, note: using 2norm or sqrt makes this infeasible). + cost = self.cost_function.get_cost(self.opti_dict) + if self.soften_constraints: + cost = cost + self.slack_cost * slack + cost = cost + self.slack_cost * slack_term + opti.minimize(cost) + self.opti_dict['cost'] = cost + + def setup_acados_optimizer(self): + '''setup_optimizer_acados''' + # create ocp object to formulate the OCP + ocp = AcadosOcp() + + # Setup model + model = AcadosModel() + model.x = self.model.x_sym + model.u = self.model.u_sym + model.f_expl_expr = self.model.x_dot + + if self.env.NAME == Environment.CARTPOLE: + x1_dot = cs.MX.sym('x1_dot') + v_dot = cs.MX.sym('v_dot') + theta1_dot = cs.MX.sym('theta1_dot') + dtheta_dot = cs.MX.sym('dtheta_dot') + xdot = cs.vertcat(x1_dot, v_dot, theta1_dot, dtheta_dot) + elif self.env.NAME == Environment.QUADROTOR and self.env.QUAD_TYPE in [2, 4]: + x1_dot = cs.MX.sym('x1_dot') + vx_dot = cs.MX.sym('vx_dot') + z1_dot = cs.MX.sym('z1_dot') + vz_dot = cs.MX.sym('vz_dot') + theta1_dot = cs.MX.sym('theta1_dot') + dtheta_dot = cs.MX.sym('dtheta_dot') + xdot = cs.vertcat(x1_dot, vx_dot, z1_dot, vz_dot, theta1_dot, dtheta_dot) + else: + x1_dot = cs.MX.sym('x1_dot') + vx_dot = cs.MX.sym('vx_dot') + y1_dot = cs.MX.sym('y1_dot') + vy_dot = cs.MX.sym('vy_dot') + z1_dot = cs.MX.sym('z1_dot') + vz_dot = cs.MX.sym('vz_dot') + phi1_dot = cs.MX.sym('phi1_dot') # Roll + theta1_dot = cs.MX.sym('theta1_dot') # Pitch + psi1_dot = cs.MX.sym('psi1_dot') # Yaw + p1_body_dot = cs.MX.sym('p1_body_dot') # Body frame roll rate + q1_body_dot = cs.MX.sym('q1_body_dot') # body frame pith rate + r1_body_dot = cs.MX.sym('r1_body_dot') # body frame yaw rate + xdot = cs.vertcat(x1_dot, vx_dot, y1_dot, vy_dot, z1_dot, vz_dot, phi1_dot, theta1_dot, psi1_dot, p1_body_dot, q1_body_dot, r1_body_dot) + + model.xdot = xdot + model.f_impl_expr = model.xdot - model.f_expl_expr + model.name = 'mpsf' + ocp.model = model + + nx, nu = self.model.nx, self.model.nu + ny = nx + nu + + ocp.dims.N = self.horizon + + # set cost module + ocp.cost.cost_type = 'LINEAR_LS' + ocp.cost.cost_type_e = 'LINEAR_LS' + + Q_mat = np.zeros((nx, nx)) + ocp.cost.W_e = np.zeros((nx, nx)) + R_mat = np.eye(nu) + ocp.cost.W = block_diag(Q_mat, R_mat) + + ocp.cost.Vx = np.zeros((ny, nx)) + ocp.cost.Vu = np.zeros((ny, nu)) + ocp.cost.Vu[nx:nx + nu, :] = np.eye(nu) + ocp.cost.Vx_e = np.eye(nx) + + # Updated on each iteration + ocp.cost.yref = np.concatenate((self.model.X_EQ, self.model.U_EQ)) + ocp.cost.yref_e = self.model.X_EQ + + # set constraints + ocp.constraints.constr_type = 'BGH' + ocp.constraints.constr_type_e = 'BGH' + + ocp.constraints.x0 = self.model.X_EQ + ocp.constraints.C = self.L_x + ocp.constraints.D = self.L_u + ocp.constraints.lg = -1000 * np.ones((self.p)) + ocp.constraints.ug = np.zeros((self.p)) + + # Slack + if self.soften_constraints: + ocp.constraints.Jsg = np.eye(self.p) + ocp.cost.Zu = self.slack_cost * np.ones(self.p) + ocp.cost.Zl = self.slack_cost * np.ones(self.p) + ocp.cost.zl = self.slack_cost * np.ones(self.p) + ocp.cost.zu = self.slack_cost * np.ones(self.p) + + # Options + ocp.solver_options.qp_solver = 'FULL_CONDENSING_HPIPM' + ocp.solver_options.hessian_approx = 'GAUSS_NEWTON' + ocp.solver_options.hpipm_mode = 'BALANCE' + ocp.solver_options.integrator_type = 'ERK' + ocp.solver_options.nlp_solver_type = 'SQP_RTI' + + # set prediction horizon + ocp.solver_options.tf = self.dt * self.horizon + + solver_json = 'acados_ocp_mpsf.json' + ocp_solver = AcadosOcpSolver(ocp, json_file=solver_json, generate=False, build=False) + + for stage in range(self.mpsc_cost_horizon): + ocp_solver.cost_set(stage, 'W', (self.cost_function.decay_factor**stage) * ocp.cost.W) + + for stage in range(self.mpsc_cost_horizon, self.horizon): + ocp_solver.cost_set(stage, 'W', 0 * ocp.cost.W) + + s_var = np.zeros((self.horizon + 1)) + g = np.zeros((self.horizon, self.p)) + + for i in range(self.horizon): + s_var[i + 1] = self.rho * s_var[i] + self.max_w + for j in range(self.p): + tighten_by = self.c_js[j] * s_var[i + 1] + g[i, j] = (self.l_xu[j] - tighten_by) + g[i, :] += (self.L_x @ self.X_mid) + (self.L_u @ self.U_mid) + ocp_solver.constraints_set(i, 'ug', g[i, :]) + + self.ocp_solver = ocp_solver