MPSC_ACADOS works

Federico-PizarroBejarano · Nov 4, 2024 · 1fac28d · 1fac28d
1 parent 6294b73
commit 1fac28d
Show file tree

Hide file tree

Showing 8 changed files with 350 additions and 56 deletions.
diff --git a/experiments/mpsc/config_overrides/mpsc_acados_quadrotor_2D_attitude.yaml b/experiments/mpsc/config_overrides/mpsc_acados_quadrotor_2D_attitude.yaml
@@ -0,0 +1,32 @@
+safety_filter: nl_mpsc
+sf_config:
+  # LQR controller parameters
+  q_mpc: [18, 0.1, 18, 0.5, 0.5, 0.0001]
+  r_mpc: [3., 3.]
+
+  prior_info:
+    prior_prop:
+      beta_1: 18.11298
+      beta_2: 3.6800
+      beta_3: 0
+      alpha_1: -140.8
+      alpha_2: -13.4
+      alpha_3: 124.8
+    randomize_prior_prop: False
+    prior_prop_rand_info: null
+
+  # MPC Parameters
+  use_acados: True
+  horizon: 25
+  warmstart: True
+  integration_algo: rk4
+  use_terminal_set: False
+
+  # Cost function
+  cost_function: one_step_cost
+  mpsc_cost_horizon: 5
+  decay_factor: 0.85
+
+  # Softening
+  soften_constraints: True
+  slack_cost: 250.0
diff --git a/experiments/mpsc/mpsc_experiment.py b/experiments/mpsc/mpsc_experiment.py
@@ -47,7 +47,7 @@ def run(plot=False, training=False, model='ppo'):
 
     if config.algo in ['ppo', 'sac']:
         # Load state_dict from trained.
-        ctrl.load(f'./models/rl_models/{model}/model_latest.pt')
+        ctrl.load(f'./models/rl_models/{model}/model_best.pt')
 
         # Remove temporary files and directories
         shutil.rmtree('./temp', ignore_errors=True)
@@ -74,7 +74,7 @@ def run(plot=False, training=False, model='ppo'):
                              )
         safety_filter.learn(env=train_env)
         safety_filter.save(path=f'./models/mpsc_parameters/{config.safety_filter}_{system}.pkl')
-        # raise SystemExit
+        raise SystemExit
     else:
         safety_filter.load(path=f'./models/mpsc_parameters/{config.safety_filter}_{system}.pkl')
 
@@ -134,7 +134,7 @@ def run_multiple_models(plot, all_models):
 
     for model in all_models:
         print(model)
-        for i in range(25):
+        for i in range(25 if not plot else 1):
             X_GOAL, uncert_results, _, cert_results, _ = run(plot=plot, training=False, model=model)
             if i == 0:
                 all_uncert_results, all_cert_results = uncert_results, cert_results
@@ -161,5 +161,5 @@ def run_multiple_models(plot, all_models):
 
 
 if __name__ == '__main__':
-    # run(plot=True, training=False, model='ppo')
-    run_multiple_models(plot=False, all_models=['none', 'mpsf'])
+    # run(plot=True, training=False, model='none')
+    run_multiple_models(plot=True, all_models=['none2', 'mpsf2'])
diff --git a/experiments/mpsc/mpsc_experiment.sh b/experiments/mpsc/mpsc_experiment.sh
@@ -4,10 +4,10 @@ SYS='quadrotor_2D_attitude'
 TASK='tracking'
 ALGO='ppo'
 
-SAFETY_FILTER='nl_mpsc'
-# SAFETY_FILTER='mpsc_acados'
-# MPSC_COST='precomputed_cost'
-MPSC_COST='one_step_cost'
+# SAFETY_FILTER='nl_mpsc'
+SAFETY_FILTER='mpsc_acados'
+MPSC_COST='precomputed_cost'
+# MPSC_COST='one_step_cost'
 MPSC_COST_HORIZON=25
 DECAY_FACTOR=0.9
 

diff --git a/experiments/mpsc/train_model.sh b/experiments/mpsc/train_model.sh
@@ -10,9 +10,9 @@ FILTER=False
 SF_PEN=1
 
 if [ "$FILTER" == 'True' ]; then
-    TAG=mpsf
+    TAG=mpsf2
 else
-    TAG=none
+    TAG=none2
 fi
 
 # Train the unsafe controller/agent.

diff --git a/safe_control_gym/safety_filters/__init__.py b/safe_control_gym/safety_filters/__init__.py
@@ -10,6 +10,10 @@
          entry_point='safe_control_gym.safety_filters.mpsc.nl_mpsc:NL_MPSC',
          config_entry_point='safe_control_gym.safety_filters.mpsc:mpsc.yaml')
 
+register(idx='mpsc_acados',
+         entry_point='safe_control_gym.safety_filters.mpsc.mpsc_acados:MPSC_ACADOS',
+         config_entry_point='safe_control_gym.safety_filters.mpsc:mpsc.yaml')
+
 register(idx='cbf',
          entry_point='safe_control_gym.safety_filters.cbf.cbf:CBF',
          config_entry_point='safe_control_gym.safety_filters.cbf:cbf.yaml')

diff --git a/safe_control_gym/safety_filters/mpsc/mpsc_acados.py b/safe_control_gym/safety_filters/mpsc/mpsc_acados.py
@@ -0,0 +1,259 @@
+'''Model Predictive Safety Certification using Acados.'''
+import os
+import shutil
+from datetime import datetime
+
+import casadi as cs
+import numpy as np
+import scipy
+from acados_template import AcadosModel, AcadosOcp, AcadosOcpSolver
+
+from safe_control_gym.controllers.mpc.mpc_utils import set_acados_constraint_bound
+from safe_control_gym.safety_filters.mpsc.mpsc import MPSC
+from safe_control_gym.safety_filters.mpsc.mpsc_utils import Cost_Function
+from safe_control_gym.utils.utils import timing
+
+
+class MPSC_ACADOS(MPSC):
+    '''MPSC with full nonlinear model.'''
+
+    def __init__(
+            self,
+            env_func,
+            horizon: int = 5,
+            q_mpc: list = [1],
+            r_mpc: list = [1],
+            integration_algo: str = 'rk4',
+            warmstart: bool = True,
+            additional_constraints: list = None,
+            use_terminal_set: bool = True,
+            soften_constraints: bool = False,
+            slack_cost: float = 1,
+            constraint_tol: float = 1e-6,
+            seed: int = 0,
+            use_RTI: bool = False,
+            cost_function: Cost_Function = Cost_Function.ONE_STEP_COST,
+            mpsc_cost_horizon: int = 5,
+            decay_factor: float = 0.85,
+            **kwargs
+    ):
+        '''Creates task and controller.
+
+        Args:
+            env_func (Callable): function to instantiate task/environment.
+            horizon (int): mpc planning horizon.
+            q_mpc (list): diagonals of state cost weight.
+            r_mpc (list): diagonals of input/action cost weight.
+            warmstart (bool): if to initialize from previous iteration.
+            soften_constraints (bool): Formulate the constraints as soft constraints.
+            constraint_tol (float): Tolerance to add the the constraint as sometimes solvers are not exact.
+            seed (int): random seed.
+            use_RTI (bool): Real-time iteration for acados.
+        '''
+        for k, v in locals().items():
+            if k != 'self' and k != 'kwargs' and '__' not in k:
+                self.__dict__.update({k: v})
+
+        super().__init__(
+            env_func,
+            horizon,
+            q_mpc,
+            r_mpc,
+            integration_algo,
+            warmstart,
+            additional_constraints,
+            use_terminal_set,
+            cost_function,
+            mpsc_cost_horizon,
+            decay_factor,
+            **kwargs)
+
+        # acados settings
+        self.use_RTI = use_RTI
+
+        # Dynamics model.
+        self.setup_acados_model()
+        # Acados optimizer.
+        self.setup_acados_optimizer()
+
+    @timing
+    def reset(self):
+        '''Prepares for training or evaluation.'''
+        print('Resetting MPC')
+        super().reset()
+        if hasattr(self, 'acados_model'):
+            del self.acados_model
+        if hasattr(self, 'ocp'):
+            del self.ocp
+        if hasattr(self, 'acados_ocp_solver'):
+            del self.acados_ocp_solver
+
+        # delete the generated c code directory
+        if os.path.exists('./c_generated_code'):
+            print('deleting the generated MPC c code directory')
+            shutil.rmtree('./c_generated_code')
+            assert not os.path.exists('./c_generated_code'), 'Failed to delete the generated c code directory'
+
+    def setup_acados_model(self) -> AcadosModel:
+        '''Sets up symbolic model for acados.'''
+        acados_model = AcadosModel()
+        acados_model.x = self.model.x_sym
+        acados_model.u = self.model.u_sym
+        acados_model.name = self.env.NAME
+
+        # set up rk4 (acados need symbolic expression of dynamics, not function)
+        k1 = self.model.fc_func(acados_model.x, acados_model.u)
+        k2 = self.model.fc_func(acados_model.x + self.dt / 2 * k1, acados_model.u)
+        k3 = self.model.fc_func(acados_model.x + self.dt / 2 * k2, acados_model.u)
+        k4 = self.model.fc_func(acados_model.x + self.dt * k3, acados_model.u)
+        f_disc = acados_model.x + self.dt / 6 * (k1 + 2 * k2 + 2 * k3 + k4)
+
+        acados_model.disc_dyn_expr = f_disc
+
+        # store meta information # NOTE: unit is missing
+        acados_model.x_labels = self.env.STATE_LABELS
+        acados_model.u_labels = self.env.ACTION_LABELS
+        acados_model.t_label = 'time'
+        # get current time stamp in $ymd_HMS format
+        current_time = datetime.now().strftime('%Y%m%d_%H%M%S')
+        acados_model.name = self.env.NAME + '_' + current_time
+
+        self.acados_model = acados_model
+
+    def set_dynamics(self):
+        pass
+
+    def setup_casadi_optimizer(self):
+        pass
+
+    def setup_acados_optimizer(self):
+        '''Sets up nonlinear optimization problem.'''
+        nx, nu = self.model.nx, self.model.nu
+        ny = nx + nu
+        ny_e = nx
+
+        # create ocp object to formulate the OCP
+        ocp = AcadosOcp()
+        ocp.model = self.acados_model
+
+        # set dimensions
+        ocp.dims.N = self.horizon  # prediction horizon
+
+        # set cost (NOTE: safe-control-gym uses quadratic cost)
+        ocp.cost.cost_type = 'LINEAR_LS'
+        ocp.cost.cost_type_e = 'LINEAR_LS'
+
+        Q_mat = np.zeros((nx, nx))
+        R_mat = np.eye(nu)
+        ocp.cost.W_e = np.zeros((nx, nx))
+        ocp.cost.W = scipy.linalg.block_diag(Q_mat, R_mat)
+
+        ocp.cost.Vx = np.zeros((ny, nx))
+        ocp.cost.Vu = np.zeros((ny, nu))
+        ocp.cost.Vu[nx:nx + nu, :] = np.eye(nu)
+        ocp.cost.Vx_e = np.eye(nx)
+
+        # placeholder y_ref and y_ref_e (will be set in select_action)
+        ocp.cost.yref = np.zeros((ny, ))
+        ocp.cost.yref_e = np.zeros((ny_e, ))
+
+        # set up solver options
+        ocp.solver_options.qp_solver = 'PARTIAL_CONDENSING_HPIPM'
+        ocp.solver_options.hessian_approx = 'GAUSS_NEWTON'
+        ocp.solver_options.integrator_type = 'DISCRETE'
+        ocp.solver_options.nlp_solver_type = 'SQP' if not self.use_RTI else 'SQP_RTI'
+        ocp.solver_options.nlp_solver_max_iter = 25 if not self.use_RTI else 1
+        ocp.solver_options.tf = self.horizon * self.dt  # prediction horizon
+
+        ocp.constraints.x0 = self.model.X_EQ
+
+        # Constraints
+        # general constraint expressions
+        state_constraint_expr_list = []
+        input_constraint_expr_list = []
+        for state_constraint in self.state_constraints_sym:
+            state_constraint_expr_list.append(state_constraint(ocp.model.x))
+        for input_constraint in self.input_constraints_sym:
+            input_constraint_expr_list.append(input_constraint(ocp.model.u))
+
+        h_expr_list = state_constraint_expr_list + input_constraint_expr_list
+        h_expr = cs.vertcat(*h_expr_list)
+        h0_expr = cs.vertcat(*h_expr_list)
+        he_expr = cs.vertcat(*state_constraint_expr_list)  # terminal constraints are only state constraints
+        # pass the constraints to the ocp object
+        ocp = self.processing_acados_constraints_expression(ocp, h0_expr, h_expr, he_expr)
+
+        # slack costs for nonlinear constraints
+        if self.soften_constraints:
+            # slack variables for all constraints
+            ocp.constraints.Jsh = np.eye(2 * ny)
+            # slack penalty
+            ocp.cost.Zu = self.slack_cost * np.ones(2 * ny)
+            ocp.cost.Zl = self.slack_cost * np.ones(2 * ny)
+            ocp.cost.zl = self.slack_cost * np.ones(2 * ny)
+            ocp.cost.zu = self.slack_cost * np.ones(2 * ny)
+
+        solver_json = 'acados_ocp_mpsf.json'
+        ocp_solver = AcadosOcpSolver(ocp, json_file=solver_json, generate=True, build=True)
+
+        for stage in range(self.mpsc_cost_horizon):
+            ocp_solver.cost_set(stage, 'W', (self.cost_function.decay_factor**stage) * ocp.cost.W)
+
+        for stage in range(self.mpsc_cost_horizon, self.horizon):
+            ocp_solver.cost_set(stage, 'W', 0 * ocp.cost.W)
+
+        self.ocp_solver = ocp_solver
+        self.ocp = ocp
+
+    def processing_acados_constraints_expression(self, ocp: AcadosOcp, h0_expr, h_expr, he_expr) -> AcadosOcp:
+        '''Preprocess the constraints to be compatible with acados.
+            Args:
+                ocp (AcadosOcp): acados ocp object
+                h0_expr (casadi expression): initial state constraints
+                h_expr (casadi expression): state and input constraints
+                he_expr (casadi expression): terminal state constraints
+            Returns:
+                ocp (AcadosOcp): acados ocp object with constraints set.
+
+        An alternative way to set the constraints is to use bounded constraints of acados:
+        # bounded input constraints
+        idxbu = np.where(np.sum(self.env.constraints.input_constraints[0].constraint_filter, axis=0) != 0)[0]
+        ocp.constraints.Jbu = np.eye(nu)
+        ocp.constraints.lbu = self.env.constraints.input_constraints[0].lower_bounds
+        ocp.constraints.ubu = self.env.constraints.input_constraints[0].upper_bounds
+        ocp.constraints.idxbu = idxbu # active constraints dimension
+        '''
+
+        ub = {'h': set_acados_constraint_bound(h_expr, 'ub', self.constraint_tol),
+              'h0': set_acados_constraint_bound(h0_expr, 'ub', self.constraint_tol),
+              'he': set_acados_constraint_bound(he_expr, 'ub', self.constraint_tol), }
+
+        lb = {'h': set_acados_constraint_bound(h_expr, 'lb'),
+              'h0': set_acados_constraint_bound(h0_expr, 'lb'),
+              'he': set_acados_constraint_bound(he_expr, 'lb'), }
+
+        # make sure all the ub and lb are 1D numpy arrays
+        # (see: https://discourse.acados.org/t/infeasible-qps-when-using-nonlinear-casadi-constraint-expressions/1595/5?u=mxche)
+        for key in ub.keys():
+            ub[key] = ub[key].flatten() if ub[key].ndim != 1 else ub[key]
+            lb[key] = lb[key].flatten() if lb[key].ndim != 1 else lb[key]
+        # check ub and lb dimensions
+        for key in ub.keys():
+            assert ub[key].ndim == 1, f'ub[{key}] is not 1D numpy array'
+            assert lb[key].ndim == 1, f'lb[{key}] is not 1D numpy array'
+        assert ub['h'].shape == lb['h'].shape, 'h_ub and h_lb have different shapes'
+
+        # pass the constraints to the ocp object
+        ocp.model.con_h_expr_0, ocp.model.con_h_expr, ocp.model.con_h_expr_e = \
+            h0_expr, h_expr, he_expr
+        ocp.dims.nh_0, ocp.dims.nh, ocp.dims.nh_e = \
+            h0_expr.shape[0], h_expr.shape[0], he_expr.shape[0]
+        # assign constraints upper and lower bounds
+        ocp.constraints.uh_0 = ub['h0']
+        ocp.constraints.lh_0 = lb['h0']
+        ocp.constraints.uh = ub['h']
+        ocp.constraints.lh = lb['h']
+        ocp.constraints.uh_e = ub['he']
+        ocp.constraints.lh_e = lb['he']
+
+        return ocp