diff --git a/experiments/mpsc/config_overrides/mpsc_acados_quadrotor_2D_attitude.yaml b/experiments/mpsc/config_overrides/mpsc_acados_quadrotor_2D_attitude.yaml new file mode 100644 index 000000000..50bf75c92 --- /dev/null +++ b/experiments/mpsc/config_overrides/mpsc_acados_quadrotor_2D_attitude.yaml @@ -0,0 +1,32 @@ +safety_filter: nl_mpsc +sf_config: + # LQR controller parameters + q_mpc: [18, 0.1, 18, 0.5, 0.5, 0.0001] + r_mpc: [3., 3.] + + prior_info: + prior_prop: + beta_1: 18.11298 + beta_2: 3.6800 + beta_3: 0 + alpha_1: -140.8 + alpha_2: -13.4 + alpha_3: 124.8 + randomize_prior_prop: False + prior_prop_rand_info: null + + # MPC Parameters + use_acados: True + horizon: 25 + warmstart: True + integration_algo: rk4 + use_terminal_set: False + + # Cost function + cost_function: one_step_cost + mpsc_cost_horizon: 5 + decay_factor: 0.85 + + # Softening + soften_constraints: True + slack_cost: 250.0 diff --git a/experiments/mpsc/mpsc_experiment.py b/experiments/mpsc/mpsc_experiment.py index cdab535d2..16c7a408c 100644 --- a/experiments/mpsc/mpsc_experiment.py +++ b/experiments/mpsc/mpsc_experiment.py @@ -47,7 +47,7 @@ def run(plot=False, training=False, model='ppo'): if config.algo in ['ppo', 'sac']: # Load state_dict from trained. - ctrl.load(f'./models/rl_models/{model}/model_latest.pt') + ctrl.load(f'./models/rl_models/{model}/model_best.pt') # Remove temporary files and directories shutil.rmtree('./temp', ignore_errors=True) @@ -74,7 +74,7 @@ def run(plot=False, training=False, model='ppo'): ) safety_filter.learn(env=train_env) safety_filter.save(path=f'./models/mpsc_parameters/{config.safety_filter}_{system}.pkl') - # raise SystemExit + raise SystemExit else: safety_filter.load(path=f'./models/mpsc_parameters/{config.safety_filter}_{system}.pkl') @@ -134,7 +134,7 @@ def run_multiple_models(plot, all_models): for model in all_models: print(model) - for i in range(25): + for i in range(25 if not plot else 1): X_GOAL, uncert_results, _, cert_results, _ = run(plot=plot, training=False, model=model) if i == 0: all_uncert_results, all_cert_results = uncert_results, cert_results @@ -161,5 +161,5 @@ def run_multiple_models(plot, all_models): if __name__ == '__main__': - # run(plot=True, training=False, model='ppo') - run_multiple_models(plot=False, all_models=['none', 'mpsf']) + # run(plot=True, training=False, model='none') + run_multiple_models(plot=True, all_models=['none2', 'mpsf2']) diff --git a/experiments/mpsc/mpsc_experiment.sh b/experiments/mpsc/mpsc_experiment.sh index 678584020..526c1cfd5 100755 --- a/experiments/mpsc/mpsc_experiment.sh +++ b/experiments/mpsc/mpsc_experiment.sh @@ -4,10 +4,10 @@ SYS='quadrotor_2D_attitude' TASK='tracking' ALGO='ppo' -SAFETY_FILTER='nl_mpsc' -# SAFETY_FILTER='mpsc_acados' -# MPSC_COST='precomputed_cost' -MPSC_COST='one_step_cost' +# SAFETY_FILTER='nl_mpsc' +SAFETY_FILTER='mpsc_acados' +MPSC_COST='precomputed_cost' +# MPSC_COST='one_step_cost' MPSC_COST_HORIZON=25 DECAY_FACTOR=0.9 diff --git a/experiments/mpsc/train_model.sh b/experiments/mpsc/train_model.sh index 9febae2ef..1d3f09109 100755 --- a/experiments/mpsc/train_model.sh +++ b/experiments/mpsc/train_model.sh @@ -10,9 +10,9 @@ FILTER=False SF_PEN=1 if [ "$FILTER" == 'True' ]; then - TAG=mpsf + TAG=mpsf2 else - TAG=none + TAG=none2 fi # Train the unsafe controller/agent. diff --git a/safe_control_gym/safety_filters/__init__.py b/safe_control_gym/safety_filters/__init__.py index a283ed008..b82a4b201 100644 --- a/safe_control_gym/safety_filters/__init__.py +++ b/safe_control_gym/safety_filters/__init__.py @@ -10,6 +10,10 @@ entry_point='safe_control_gym.safety_filters.mpsc.nl_mpsc:NL_MPSC', config_entry_point='safe_control_gym.safety_filters.mpsc:mpsc.yaml') +register(idx='mpsc_acados', + entry_point='safe_control_gym.safety_filters.mpsc.mpsc_acados:MPSC_ACADOS', + config_entry_point='safe_control_gym.safety_filters.mpsc:mpsc.yaml') + register(idx='cbf', entry_point='safe_control_gym.safety_filters.cbf.cbf:CBF', config_entry_point='safe_control_gym.safety_filters.cbf:cbf.yaml') diff --git a/safe_control_gym/safety_filters/mpsc/mpsc_acados.py b/safe_control_gym/safety_filters/mpsc/mpsc_acados.py new file mode 100644 index 000000000..1846f85ca --- /dev/null +++ b/safe_control_gym/safety_filters/mpsc/mpsc_acados.py @@ -0,0 +1,259 @@ +'''Model Predictive Safety Certification using Acados.''' +import os +import shutil +from datetime import datetime + +import casadi as cs +import numpy as np +import scipy +from acados_template import AcadosModel, AcadosOcp, AcadosOcpSolver + +from safe_control_gym.controllers.mpc.mpc_utils import set_acados_constraint_bound +from safe_control_gym.safety_filters.mpsc.mpsc import MPSC +from safe_control_gym.safety_filters.mpsc.mpsc_utils import Cost_Function +from safe_control_gym.utils.utils import timing + + +class MPSC_ACADOS(MPSC): + '''MPSC with full nonlinear model.''' + + def __init__( + self, + env_func, + horizon: int = 5, + q_mpc: list = [1], + r_mpc: list = [1], + integration_algo: str = 'rk4', + warmstart: bool = True, + additional_constraints: list = None, + use_terminal_set: bool = True, + soften_constraints: bool = False, + slack_cost: float = 1, + constraint_tol: float = 1e-6, + seed: int = 0, + use_RTI: bool = False, + cost_function: Cost_Function = Cost_Function.ONE_STEP_COST, + mpsc_cost_horizon: int = 5, + decay_factor: float = 0.85, + **kwargs + ): + '''Creates task and controller. + + Args: + env_func (Callable): function to instantiate task/environment. + horizon (int): mpc planning horizon. + q_mpc (list): diagonals of state cost weight. + r_mpc (list): diagonals of input/action cost weight. + warmstart (bool): if to initialize from previous iteration. + soften_constraints (bool): Formulate the constraints as soft constraints. + constraint_tol (float): Tolerance to add the the constraint as sometimes solvers are not exact. + seed (int): random seed. + use_RTI (bool): Real-time iteration for acados. + ''' + for k, v in locals().items(): + if k != 'self' and k != 'kwargs' and '__' not in k: + self.__dict__.update({k: v}) + + super().__init__( + env_func, + horizon, + q_mpc, + r_mpc, + integration_algo, + warmstart, + additional_constraints, + use_terminal_set, + cost_function, + mpsc_cost_horizon, + decay_factor, + **kwargs) + + # acados settings + self.use_RTI = use_RTI + + # Dynamics model. + self.setup_acados_model() + # Acados optimizer. + self.setup_acados_optimizer() + + @timing + def reset(self): + '''Prepares for training or evaluation.''' + print('Resetting MPC') + super().reset() + if hasattr(self, 'acados_model'): + del self.acados_model + if hasattr(self, 'ocp'): + del self.ocp + if hasattr(self, 'acados_ocp_solver'): + del self.acados_ocp_solver + + # delete the generated c code directory + if os.path.exists('./c_generated_code'): + print('deleting the generated MPC c code directory') + shutil.rmtree('./c_generated_code') + assert not os.path.exists('./c_generated_code'), 'Failed to delete the generated c code directory' + + def setup_acados_model(self) -> AcadosModel: + '''Sets up symbolic model for acados.''' + acados_model = AcadosModel() + acados_model.x = self.model.x_sym + acados_model.u = self.model.u_sym + acados_model.name = self.env.NAME + + # set up rk4 (acados need symbolic expression of dynamics, not function) + k1 = self.model.fc_func(acados_model.x, acados_model.u) + k2 = self.model.fc_func(acados_model.x + self.dt / 2 * k1, acados_model.u) + k3 = self.model.fc_func(acados_model.x + self.dt / 2 * k2, acados_model.u) + k4 = self.model.fc_func(acados_model.x + self.dt * k3, acados_model.u) + f_disc = acados_model.x + self.dt / 6 * (k1 + 2 * k2 + 2 * k3 + k4) + + acados_model.disc_dyn_expr = f_disc + + # store meta information # NOTE: unit is missing + acados_model.x_labels = self.env.STATE_LABELS + acados_model.u_labels = self.env.ACTION_LABELS + acados_model.t_label = 'time' + # get current time stamp in $ymd_HMS format + current_time = datetime.now().strftime('%Y%m%d_%H%M%S') + acados_model.name = self.env.NAME + '_' + current_time + + self.acados_model = acados_model + + def set_dynamics(self): + pass + + def setup_casadi_optimizer(self): + pass + + def setup_acados_optimizer(self): + '''Sets up nonlinear optimization problem.''' + nx, nu = self.model.nx, self.model.nu + ny = nx + nu + ny_e = nx + + # create ocp object to formulate the OCP + ocp = AcadosOcp() + ocp.model = self.acados_model + + # set dimensions + ocp.dims.N = self.horizon # prediction horizon + + # set cost (NOTE: safe-control-gym uses quadratic cost) + ocp.cost.cost_type = 'LINEAR_LS' + ocp.cost.cost_type_e = 'LINEAR_LS' + + Q_mat = np.zeros((nx, nx)) + R_mat = np.eye(nu) + ocp.cost.W_e = np.zeros((nx, nx)) + ocp.cost.W = scipy.linalg.block_diag(Q_mat, R_mat) + + ocp.cost.Vx = np.zeros((ny, nx)) + ocp.cost.Vu = np.zeros((ny, nu)) + ocp.cost.Vu[nx:nx + nu, :] = np.eye(nu) + ocp.cost.Vx_e = np.eye(nx) + + # placeholder y_ref and y_ref_e (will be set in select_action) + ocp.cost.yref = np.zeros((ny, )) + ocp.cost.yref_e = np.zeros((ny_e, )) + + # set up solver options + ocp.solver_options.qp_solver = 'PARTIAL_CONDENSING_HPIPM' + ocp.solver_options.hessian_approx = 'GAUSS_NEWTON' + ocp.solver_options.integrator_type = 'DISCRETE' + ocp.solver_options.nlp_solver_type = 'SQP' if not self.use_RTI else 'SQP_RTI' + ocp.solver_options.nlp_solver_max_iter = 25 if not self.use_RTI else 1 + ocp.solver_options.tf = self.horizon * self.dt # prediction horizon + + ocp.constraints.x0 = self.model.X_EQ + + # Constraints + # general constraint expressions + state_constraint_expr_list = [] + input_constraint_expr_list = [] + for state_constraint in self.state_constraints_sym: + state_constraint_expr_list.append(state_constraint(ocp.model.x)) + for input_constraint in self.input_constraints_sym: + input_constraint_expr_list.append(input_constraint(ocp.model.u)) + + h_expr_list = state_constraint_expr_list + input_constraint_expr_list + h_expr = cs.vertcat(*h_expr_list) + h0_expr = cs.vertcat(*h_expr_list) + he_expr = cs.vertcat(*state_constraint_expr_list) # terminal constraints are only state constraints + # pass the constraints to the ocp object + ocp = self.processing_acados_constraints_expression(ocp, h0_expr, h_expr, he_expr) + + # slack costs for nonlinear constraints + if self.soften_constraints: + # slack variables for all constraints + ocp.constraints.Jsh = np.eye(2 * ny) + # slack penalty + ocp.cost.Zu = self.slack_cost * np.ones(2 * ny) + ocp.cost.Zl = self.slack_cost * np.ones(2 * ny) + ocp.cost.zl = self.slack_cost * np.ones(2 * ny) + ocp.cost.zu = self.slack_cost * np.ones(2 * ny) + + solver_json = 'acados_ocp_mpsf.json' + ocp_solver = AcadosOcpSolver(ocp, json_file=solver_json, generate=True, build=True) + + for stage in range(self.mpsc_cost_horizon): + ocp_solver.cost_set(stage, 'W', (self.cost_function.decay_factor**stage) * ocp.cost.W) + + for stage in range(self.mpsc_cost_horizon, self.horizon): + ocp_solver.cost_set(stage, 'W', 0 * ocp.cost.W) + + self.ocp_solver = ocp_solver + self.ocp = ocp + + def processing_acados_constraints_expression(self, ocp: AcadosOcp, h0_expr, h_expr, he_expr) -> AcadosOcp: + '''Preprocess the constraints to be compatible with acados. + Args: + ocp (AcadosOcp): acados ocp object + h0_expr (casadi expression): initial state constraints + h_expr (casadi expression): state and input constraints + he_expr (casadi expression): terminal state constraints + Returns: + ocp (AcadosOcp): acados ocp object with constraints set. + + An alternative way to set the constraints is to use bounded constraints of acados: + # bounded input constraints + idxbu = np.where(np.sum(self.env.constraints.input_constraints[0].constraint_filter, axis=0) != 0)[0] + ocp.constraints.Jbu = np.eye(nu) + ocp.constraints.lbu = self.env.constraints.input_constraints[0].lower_bounds + ocp.constraints.ubu = self.env.constraints.input_constraints[0].upper_bounds + ocp.constraints.idxbu = idxbu # active constraints dimension + ''' + + ub = {'h': set_acados_constraint_bound(h_expr, 'ub', self.constraint_tol), + 'h0': set_acados_constraint_bound(h0_expr, 'ub', self.constraint_tol), + 'he': set_acados_constraint_bound(he_expr, 'ub', self.constraint_tol), } + + lb = {'h': set_acados_constraint_bound(h_expr, 'lb'), + 'h0': set_acados_constraint_bound(h0_expr, 'lb'), + 'he': set_acados_constraint_bound(he_expr, 'lb'), } + + # make sure all the ub and lb are 1D numpy arrays + # (see: https://discourse.acados.org/t/infeasible-qps-when-using-nonlinear-casadi-constraint-expressions/1595/5?u=mxche) + for key in ub.keys(): + ub[key] = ub[key].flatten() if ub[key].ndim != 1 else ub[key] + lb[key] = lb[key].flatten() if lb[key].ndim != 1 else lb[key] + # check ub and lb dimensions + for key in ub.keys(): + assert ub[key].ndim == 1, f'ub[{key}] is not 1D numpy array' + assert lb[key].ndim == 1, f'lb[{key}] is not 1D numpy array' + assert ub['h'].shape == lb['h'].shape, 'h_ub and h_lb have different shapes' + + # pass the constraints to the ocp object + ocp.model.con_h_expr_0, ocp.model.con_h_expr, ocp.model.con_h_expr_e = \ + h0_expr, h_expr, he_expr + ocp.dims.nh_0, ocp.dims.nh, ocp.dims.nh_e = \ + h0_expr.shape[0], h_expr.shape[0], he_expr.shape[0] + # assign constraints upper and lower bounds + ocp.constraints.uh_0 = ub['h0'] + ocp.constraints.lh_0 = lb['h0'] + ocp.constraints.uh = ub['h'] + ocp.constraints.lh = lb['h'] + ocp.constraints.uh_e = ub['he'] + ocp.constraints.lh_e = lb['he'] + + return ocp diff --git a/safe_control_gym/safety_filters/mpsc/mpsc_utils.py b/safe_control_gym/safety_filters/mpsc/mpsc_utils.py index 0a66435a4..0771e0f75 100644 --- a/safe_control_gym/safety_filters/mpsc/mpsc_utils.py +++ b/safe_control_gym/safety_filters/mpsc/mpsc_utils.py @@ -157,45 +157,3 @@ def get_discrete_derivative(signal, ctrl_freq): ''' discrete_derivative = (signal[1:, :] - signal[:-1, :]) * ctrl_freq return discrete_derivative - - -def box2polytopic(constraint): - '''Convert constraints into an explicit polytopic form. This assumes that constraints contain the origin. - - Args: - constraint (Constraint): The constraint to be converted. - - Returns: - L (ndarray): The polytopic matrix. - l (ndarray): Whether the constraint is active. - ''' - - Limit = [] - limit_active = [] - - Z_mid = (constraint.upper_bounds + constraint.lower_bounds) / 2.0 - Z_limits = np.array([[constraint.upper_bounds[i] - Z_mid[i], constraint.lower_bounds[i] - Z_mid[i]] for i in range(constraint.upper_bounds.shape[0])]) - - dim = Z_limits.shape[0] - eye_dim = np.eye(dim) - - for constraint_id in range(0, dim): - if Z_limits[constraint_id, 0] != -float('inf'): - if Z_limits[constraint_id, 0] == 0: - limit_active += [0] - Limit += [-eye_dim[constraint_id, :]] - else: - limit_active += [1] - factor = 1 / Z_limits[constraint_id, 0] - Limit += [factor * eye_dim[constraint_id, :]] - - if Z_limits[constraint_id, 1] != float('inf'): - if Z_limits[constraint_id, 1] == 0: - limit_active += [0] - Limit += [eye_dim[constraint_id, :]] - else: - limit_active += [1] - factor = 1 / Z_limits[constraint_id, 1] - Limit += [factor * eye_dim[constraint_id, :]] - - return Z_mid, np.array(Limit), np.array(limit_active) diff --git a/safe_control_gym/safety_filters/mpsc/nl_mpsc.py b/safe_control_gym/safety_filters/mpsc/nl_mpsc.py index fc1d2781c..44ff11655 100644 --- a/safe_control_gym/safety_filters/mpsc/nl_mpsc.py +++ b/safe_control_gym/safety_filters/mpsc/nl_mpsc.py @@ -25,7 +25,7 @@ from safe_control_gym.envs.benchmark_env import Environment, Task from safe_control_gym.safety_filters.cbf.cbf_utils import cartesian_product from safe_control_gym.safety_filters.mpsc.mpsc import MPSC -from safe_control_gym.safety_filters.mpsc.mpsc_utils import Cost_Function, box2polytopic +from safe_control_gym.safety_filters.mpsc.mpsc_utils import Cost_Function class NL_MPSC(MPSC): @@ -78,8 +78,8 @@ def __init__(self, self.state_constraint = self.constraints.state_constraints[0] self.input_constraint = self.constraints.input_constraints[0] - [self.X_mid, L_x, l_x] = box2polytopic(self.state_constraint) - [self.U_mid, L_u, l_u] = box2polytopic(self.input_constraint) + [self.X_mid, L_x, l_x] = self.box2polytopic(self.state_constraint) + [self.U_mid, L_u, l_u] = self.box2polytopic(self.input_constraint) # number of constraints p_x = l_x.shape[0] @@ -367,6 +367,47 @@ def get_terminal_ingredients(self): if self.integration_algo == 'LTI' and self.use_terminal_set: self.get_terminal_constraint() + def box2polytopic(self, constraint): + '''Convert constraints into an explicit polytopic form. This assumes that constraints contain the origin. + + Args: + constraint (Constraint): The constraint to be converted. + + Returns: + L (ndarray): The polytopic matrix. + l (ndarray): Whether the constraint is active. + ''' + + Limit = [] + limit_active = [] + + Z_mid = (constraint.upper_bounds + constraint.lower_bounds) / 2.0 + Z_limits = np.array([[constraint.upper_bounds[i] - Z_mid[i], constraint.lower_bounds[i] - Z_mid[i]] for i in range(constraint.upper_bounds.shape[0])]) + + dim = Z_limits.shape[0] + eye_dim = np.eye(dim) + + for constraint_id in range(0, dim): + if Z_limits[constraint_id, 0] != -float('inf'): + if Z_limits[constraint_id, 0] == 0: + limit_active += [0] + Limit += [-eye_dim[constraint_id, :]] + else: + limit_active += [1] + factor = 1 / Z_limits[constraint_id, 0] + Limit += [factor * eye_dim[constraint_id, :]] + + if Z_limits[constraint_id, 1] != float('inf'): + if Z_limits[constraint_id, 1] == 0: + limit_active += [0] + Limit += [eye_dim[constraint_id, :]] + else: + limit_active += [1] + factor = 1 / Z_limits[constraint_id, 1] + Limit += [factor * eye_dim[constraint_id, :]] + + return Z_mid, np.array(Limit), np.array(limit_active) + def setup_tube_optimization(self, lamb): '''Sets up the optimization to find the lyapunov function.