Skip to content

Commit

Permalink
MPSC_ACADOS works
Browse files Browse the repository at this point in the history
  • Loading branch information
Federico-PizarroBejarano committed Nov 4, 2024
1 parent 6294b73 commit 1fac28d
Show file tree
Hide file tree
Showing 8 changed files with 350 additions and 56 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
safety_filter: nl_mpsc
sf_config:
# LQR controller parameters
q_mpc: [18, 0.1, 18, 0.5, 0.5, 0.0001]
r_mpc: [3., 3.]

prior_info:
prior_prop:
beta_1: 18.11298
beta_2: 3.6800
beta_3: 0
alpha_1: -140.8
alpha_2: -13.4
alpha_3: 124.8
randomize_prior_prop: False
prior_prop_rand_info: null

# MPC Parameters
use_acados: True
horizon: 25
warmstart: True
integration_algo: rk4
use_terminal_set: False

# Cost function
cost_function: one_step_cost
mpsc_cost_horizon: 5
decay_factor: 0.85

# Softening
soften_constraints: True
slack_cost: 250.0
10 changes: 5 additions & 5 deletions experiments/mpsc/mpsc_experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def run(plot=False, training=False, model='ppo'):

if config.algo in ['ppo', 'sac']:
# Load state_dict from trained.
ctrl.load(f'./models/rl_models/{model}/model_latest.pt')
ctrl.load(f'./models/rl_models/{model}/model_best.pt')

# Remove temporary files and directories
shutil.rmtree('./temp', ignore_errors=True)
Expand All @@ -74,7 +74,7 @@ def run(plot=False, training=False, model='ppo'):
)
safety_filter.learn(env=train_env)
safety_filter.save(path=f'./models/mpsc_parameters/{config.safety_filter}_{system}.pkl')
# raise SystemExit
raise SystemExit
else:
safety_filter.load(path=f'./models/mpsc_parameters/{config.safety_filter}_{system}.pkl')

Expand Down Expand Up @@ -134,7 +134,7 @@ def run_multiple_models(plot, all_models):

for model in all_models:
print(model)
for i in range(25):
for i in range(25 if not plot else 1):
X_GOAL, uncert_results, _, cert_results, _ = run(plot=plot, training=False, model=model)
if i == 0:
all_uncert_results, all_cert_results = uncert_results, cert_results
Expand All @@ -161,5 +161,5 @@ def run_multiple_models(plot, all_models):


if __name__ == '__main__':
# run(plot=True, training=False, model='ppo')
run_multiple_models(plot=False, all_models=['none', 'mpsf'])
# run(plot=True, training=False, model='none')
run_multiple_models(plot=True, all_models=['none2', 'mpsf2'])
8 changes: 4 additions & 4 deletions experiments/mpsc/mpsc_experiment.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@ SYS='quadrotor_2D_attitude'
TASK='tracking'
ALGO='ppo'

SAFETY_FILTER='nl_mpsc'
# SAFETY_FILTER='mpsc_acados'
# MPSC_COST='precomputed_cost'
MPSC_COST='one_step_cost'
# SAFETY_FILTER='nl_mpsc'
SAFETY_FILTER='mpsc_acados'
MPSC_COST='precomputed_cost'
# MPSC_COST='one_step_cost'
MPSC_COST_HORIZON=25
DECAY_FACTOR=0.9

Expand Down
4 changes: 2 additions & 2 deletions experiments/mpsc/train_model.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@ FILTER=False
SF_PEN=1

if [ "$FILTER" == 'True' ]; then
TAG=mpsf
TAG=mpsf2
else
TAG=none
TAG=none2
fi

# Train the unsafe controller/agent.
Expand Down
4 changes: 4 additions & 0 deletions safe_control_gym/safety_filters/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@
entry_point='safe_control_gym.safety_filters.mpsc.nl_mpsc:NL_MPSC',
config_entry_point='safe_control_gym.safety_filters.mpsc:mpsc.yaml')

register(idx='mpsc_acados',
entry_point='safe_control_gym.safety_filters.mpsc.mpsc_acados:MPSC_ACADOS',
config_entry_point='safe_control_gym.safety_filters.mpsc:mpsc.yaml')

register(idx='cbf',
entry_point='safe_control_gym.safety_filters.cbf.cbf:CBF',
config_entry_point='safe_control_gym.safety_filters.cbf:cbf.yaml')
Expand Down
259 changes: 259 additions & 0 deletions safe_control_gym/safety_filters/mpsc/mpsc_acados.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,259 @@
'''Model Predictive Safety Certification using Acados.'''
import os
import shutil
from datetime import datetime

import casadi as cs
import numpy as np
import scipy
from acados_template import AcadosModel, AcadosOcp, AcadosOcpSolver

from safe_control_gym.controllers.mpc.mpc_utils import set_acados_constraint_bound
from safe_control_gym.safety_filters.mpsc.mpsc import MPSC
from safe_control_gym.safety_filters.mpsc.mpsc_utils import Cost_Function
from safe_control_gym.utils.utils import timing


class MPSC_ACADOS(MPSC):
'''MPSC with full nonlinear model.'''

def __init__(
self,
env_func,
horizon: int = 5,
q_mpc: list = [1],
r_mpc: list = [1],
integration_algo: str = 'rk4',
warmstart: bool = True,
additional_constraints: list = None,
use_terminal_set: bool = True,
soften_constraints: bool = False,
slack_cost: float = 1,
constraint_tol: float = 1e-6,
seed: int = 0,
use_RTI: bool = False,
cost_function: Cost_Function = Cost_Function.ONE_STEP_COST,
mpsc_cost_horizon: int = 5,
decay_factor: float = 0.85,
**kwargs
):
'''Creates task and controller.
Args:
env_func (Callable): function to instantiate task/environment.
horizon (int): mpc planning horizon.
q_mpc (list): diagonals of state cost weight.
r_mpc (list): diagonals of input/action cost weight.
warmstart (bool): if to initialize from previous iteration.
soften_constraints (bool): Formulate the constraints as soft constraints.
constraint_tol (float): Tolerance to add the the constraint as sometimes solvers are not exact.
seed (int): random seed.
use_RTI (bool): Real-time iteration for acados.
'''
for k, v in locals().items():
if k != 'self' and k != 'kwargs' and '__' not in k:
self.__dict__.update({k: v})

super().__init__(
env_func,
horizon,
q_mpc,
r_mpc,
integration_algo,
warmstart,
additional_constraints,
use_terminal_set,
cost_function,
mpsc_cost_horizon,
decay_factor,
**kwargs)

# acados settings
self.use_RTI = use_RTI

# Dynamics model.
self.setup_acados_model()
# Acados optimizer.
self.setup_acados_optimizer()

@timing
def reset(self):
'''Prepares for training or evaluation.'''
print('Resetting MPC')
super().reset()
if hasattr(self, 'acados_model'):
del self.acados_model
if hasattr(self, 'ocp'):
del self.ocp
if hasattr(self, 'acados_ocp_solver'):
del self.acados_ocp_solver

# delete the generated c code directory
if os.path.exists('./c_generated_code'):
print('deleting the generated MPC c code directory')
shutil.rmtree('./c_generated_code')
assert not os.path.exists('./c_generated_code'), 'Failed to delete the generated c code directory'

def setup_acados_model(self) -> AcadosModel:
'''Sets up symbolic model for acados.'''
acados_model = AcadosModel()
acados_model.x = self.model.x_sym
acados_model.u = self.model.u_sym
acados_model.name = self.env.NAME

# set up rk4 (acados need symbolic expression of dynamics, not function)
k1 = self.model.fc_func(acados_model.x, acados_model.u)
k2 = self.model.fc_func(acados_model.x + self.dt / 2 * k1, acados_model.u)
k3 = self.model.fc_func(acados_model.x + self.dt / 2 * k2, acados_model.u)
k4 = self.model.fc_func(acados_model.x + self.dt * k3, acados_model.u)
f_disc = acados_model.x + self.dt / 6 * (k1 + 2 * k2 + 2 * k3 + k4)

acados_model.disc_dyn_expr = f_disc

# store meta information # NOTE: unit is missing
acados_model.x_labels = self.env.STATE_LABELS
acados_model.u_labels = self.env.ACTION_LABELS
acados_model.t_label = 'time'
# get current time stamp in $ymd_HMS format
current_time = datetime.now().strftime('%Y%m%d_%H%M%S')
acados_model.name = self.env.NAME + '_' + current_time

self.acados_model = acados_model

def set_dynamics(self):
pass

def setup_casadi_optimizer(self):
pass

def setup_acados_optimizer(self):
'''Sets up nonlinear optimization problem.'''
nx, nu = self.model.nx, self.model.nu
ny = nx + nu
ny_e = nx

# create ocp object to formulate the OCP
ocp = AcadosOcp()
ocp.model = self.acados_model

# set dimensions
ocp.dims.N = self.horizon # prediction horizon

# set cost (NOTE: safe-control-gym uses quadratic cost)
ocp.cost.cost_type = 'LINEAR_LS'
ocp.cost.cost_type_e = 'LINEAR_LS'

Q_mat = np.zeros((nx, nx))
R_mat = np.eye(nu)
ocp.cost.W_e = np.zeros((nx, nx))
ocp.cost.W = scipy.linalg.block_diag(Q_mat, R_mat)

ocp.cost.Vx = np.zeros((ny, nx))
ocp.cost.Vu = np.zeros((ny, nu))
ocp.cost.Vu[nx:nx + nu, :] = np.eye(nu)
ocp.cost.Vx_e = np.eye(nx)

# placeholder y_ref and y_ref_e (will be set in select_action)
ocp.cost.yref = np.zeros((ny, ))
ocp.cost.yref_e = np.zeros((ny_e, ))

# set up solver options
ocp.solver_options.qp_solver = 'PARTIAL_CONDENSING_HPIPM'
ocp.solver_options.hessian_approx = 'GAUSS_NEWTON'
ocp.solver_options.integrator_type = 'DISCRETE'
ocp.solver_options.nlp_solver_type = 'SQP' if not self.use_RTI else 'SQP_RTI'
ocp.solver_options.nlp_solver_max_iter = 25 if not self.use_RTI else 1
ocp.solver_options.tf = self.horizon * self.dt # prediction horizon

ocp.constraints.x0 = self.model.X_EQ

# Constraints
# general constraint expressions
state_constraint_expr_list = []
input_constraint_expr_list = []
for state_constraint in self.state_constraints_sym:
state_constraint_expr_list.append(state_constraint(ocp.model.x))
for input_constraint in self.input_constraints_sym:
input_constraint_expr_list.append(input_constraint(ocp.model.u))

h_expr_list = state_constraint_expr_list + input_constraint_expr_list
h_expr = cs.vertcat(*h_expr_list)
h0_expr = cs.vertcat(*h_expr_list)
he_expr = cs.vertcat(*state_constraint_expr_list) # terminal constraints are only state constraints
# pass the constraints to the ocp object
ocp = self.processing_acados_constraints_expression(ocp, h0_expr, h_expr, he_expr)

# slack costs for nonlinear constraints
if self.soften_constraints:
# slack variables for all constraints
ocp.constraints.Jsh = np.eye(2 * ny)
# slack penalty
ocp.cost.Zu = self.slack_cost * np.ones(2 * ny)
ocp.cost.Zl = self.slack_cost * np.ones(2 * ny)
ocp.cost.zl = self.slack_cost * np.ones(2 * ny)
ocp.cost.zu = self.slack_cost * np.ones(2 * ny)

solver_json = 'acados_ocp_mpsf.json'
ocp_solver = AcadosOcpSolver(ocp, json_file=solver_json, generate=True, build=True)

for stage in range(self.mpsc_cost_horizon):
ocp_solver.cost_set(stage, 'W', (self.cost_function.decay_factor**stage) * ocp.cost.W)

for stage in range(self.mpsc_cost_horizon, self.horizon):
ocp_solver.cost_set(stage, 'W', 0 * ocp.cost.W)

self.ocp_solver = ocp_solver
self.ocp = ocp

def processing_acados_constraints_expression(self, ocp: AcadosOcp, h0_expr, h_expr, he_expr) -> AcadosOcp:
'''Preprocess the constraints to be compatible with acados.
Args:
ocp (AcadosOcp): acados ocp object
h0_expr (casadi expression): initial state constraints
h_expr (casadi expression): state and input constraints
he_expr (casadi expression): terminal state constraints
Returns:
ocp (AcadosOcp): acados ocp object with constraints set.
An alternative way to set the constraints is to use bounded constraints of acados:
# bounded input constraints
idxbu = np.where(np.sum(self.env.constraints.input_constraints[0].constraint_filter, axis=0) != 0)[0]
ocp.constraints.Jbu = np.eye(nu)
ocp.constraints.lbu = self.env.constraints.input_constraints[0].lower_bounds
ocp.constraints.ubu = self.env.constraints.input_constraints[0].upper_bounds
ocp.constraints.idxbu = idxbu # active constraints dimension
'''

ub = {'h': set_acados_constraint_bound(h_expr, 'ub', self.constraint_tol),
'h0': set_acados_constraint_bound(h0_expr, 'ub', self.constraint_tol),
'he': set_acados_constraint_bound(he_expr, 'ub', self.constraint_tol), }

lb = {'h': set_acados_constraint_bound(h_expr, 'lb'),
'h0': set_acados_constraint_bound(h0_expr, 'lb'),
'he': set_acados_constraint_bound(he_expr, 'lb'), }

# make sure all the ub and lb are 1D numpy arrays
# (see: https://discourse.acados.org/t/infeasible-qps-when-using-nonlinear-casadi-constraint-expressions/1595/5?u=mxche)
for key in ub.keys():
ub[key] = ub[key].flatten() if ub[key].ndim != 1 else ub[key]
lb[key] = lb[key].flatten() if lb[key].ndim != 1 else lb[key]
# check ub and lb dimensions
for key in ub.keys():
assert ub[key].ndim == 1, f'ub[{key}] is not 1D numpy array'
assert lb[key].ndim == 1, f'lb[{key}] is not 1D numpy array'
assert ub['h'].shape == lb['h'].shape, 'h_ub and h_lb have different shapes'

# pass the constraints to the ocp object
ocp.model.con_h_expr_0, ocp.model.con_h_expr, ocp.model.con_h_expr_e = \
h0_expr, h_expr, he_expr
ocp.dims.nh_0, ocp.dims.nh, ocp.dims.nh_e = \
h0_expr.shape[0], h_expr.shape[0], he_expr.shape[0]
# assign constraints upper and lower bounds
ocp.constraints.uh_0 = ub['h0']
ocp.constraints.lh_0 = lb['h0']
ocp.constraints.uh = ub['h']
ocp.constraints.lh = lb['h']
ocp.constraints.uh_e = ub['he']
ocp.constraints.lh_e = lb['he']

return ocp
Loading

0 comments on commit 1fac28d

Please sign in to comment.