From 21f621ab102c3a6c75824389ef7197a83f278a69 Mon Sep 17 00:00:00 2001 From: Federico-PizarroBejarano Date: Mon, 18 Nov 2024 14:13:54 -0500 Subject: [PATCH] Fixing decay factor decrease --- experiments/mpsc/mpsc_experiment.sh | 8 ++-- experiments/mpsc/plotting_results.py | 37 ++++++------------- experiments/mpsc/train_all_models.sh | 2 +- experiments/mpsc/train_model.sbatch | 9 +++-- safe_control_gym/controllers/ppo/ppo.py | 3 +- .../safety_filters/mpsc/nl_mpsc.py | 16 +++++--- 6 files changed, 33 insertions(+), 42 deletions(-) diff --git a/experiments/mpsc/mpsc_experiment.sh b/experiments/mpsc/mpsc_experiment.sh index 731c33da2..3ed45d0e1 100755 --- a/experiments/mpsc/mpsc_experiment.sh +++ b/experiments/mpsc/mpsc_experiment.sh @@ -1,11 +1,11 @@ #!/bin/bash -SYS='cartpole' +# SYS='cartpole' # SYS='quadrotor_2D' -# SYS='quadrotor_3D' +SYS='quadrotor_3D' -TASK='stab' -# TASK='track' +# TASK='stab' +TASK='track' # ALGO='lqr' # ALGO='pid' diff --git a/experiments/mpsc/plotting_results.py b/experiments/mpsc/plotting_results.py index ef247be99..f5f1f6d44 100644 --- a/experiments/mpsc/plotting_results.py +++ b/experiments/mpsc/plotting_results.py @@ -40,9 +40,8 @@ def load_all_models(system, task, algo): for model in ordered_models: all_results[model] = [] - for seed in os.listdir(f'./results_mpsc/{system}/{task}/{algo}/results_{system}_{task}_{algo}_{model}/'): - with open(f'./results_mpsc/{system}/{task}/{algo}/results_{system}_{task}_{algo}_{model}/{seed}', 'rb') as f: - all_results[model].append(pickle.load(f)) + with open(f'./results_mpsc/{model}.pkl', 'rb') as f: + all_results[model].append(pickle.load(f)) consolidate_multiple_seeds(all_results, model) return all_results @@ -525,7 +524,7 @@ def plot_model_comparisons(system, task, algo, data_extractor): medianprops = dict(linestyle='--', linewidth=2.5, color='black') bplot = ax.boxplot(data, patch_artist=True, labels=labels, medianprops=medianprops, widths=[0.75] * len(labels), showfliers=False) - for patch, color in zip(bplot['boxes'], colors.values()): + for patch, color in zip(bplot['boxes'], colors): patch.set_facecolor(color) fig.tight_layout() @@ -552,8 +551,7 @@ def plot_step_time(system, task, algo): all_results = {} for model in ordered_models: all_results[model] = [] - for seed in os.listdir(f'./models/rl_models/{system}/{task}/{algo}/{model}/'): - all_results[model].append(load_from_logs(f'./models/rl_models/{system}/{task}/{algo}/{model}/{seed}/logs/')) + all_results[model].append(load_from_logs(f'./models/rl_models/{model}/logs/')) fig = plt.figure(figsize=(16.0, 10.0)) ax = fig.add_subplot(111) @@ -575,7 +573,7 @@ def plot_step_time(system, task, algo): medianprops = dict(linestyle='--', linewidth=2.5, color='black') bplot = ax.boxplot(data, patch_artist=True, labels=labels, medianprops=medianprops, widths=[0.75] * len(labels), showfliers=False) - for patch, color in zip(bplot['boxes'], colors.values()): + for patch, color in zip(bplot['boxes'], colors): patch.set_facecolor(color) fig.tight_layout() @@ -627,8 +625,7 @@ def plot_all_logs(system, task, algo): for model in ordered_models: all_results[model] = [] - for seed in os.listdir(f'./models/rl_models/{system}/{task}/{algo}/{model}/'): - all_results[model].append(load_from_logs(f'./models/rl_models/{system}/{task}/{algo}/{model}/{seed}/logs/')) + all_results[model].append(load_from_logs(f'./models/rl_models/{model}/logs/')) for key in all_results[ordered_models[0]][0].keys(): if key == 'stat_eval/ep_return': @@ -647,13 +644,11 @@ def plot_log(key, all_results): fig = plt.figure(figsize=(16.0, 10.0)) ax = fig.add_subplot(111) - labels = ordered_models - - for model, label in zip(ordered_models, labels): + for index, model in enumerate(ordered_models): x = all_results[model][0][key][1] / 1000 all_data = np.array([values[key][3] for values in all_results[model]]) - ax.plot(x, np.mean(all_data, axis=0), label=label, color=colors[model]) - ax.fill_between(x, np.min(all_data, axis=0), np.max(all_data, axis=0), alpha=0.3, edgecolor=colors[model], facecolor=colors[model]) + ax.plot(x, np.mean(all_data, axis=0), label=model, color=colors[index]) + # ax.fill_between(x, np.min(all_data, axis=0), np.max(all_data, axis=0), alpha=0.3, edgecolor=colors[index], facecolor=colors[index]) ax.set_ylabel(key, weight='bold', fontsize=45, labelpad=10) ax.set_xlabel('Training Episodes') @@ -671,18 +666,8 @@ def plot_log(key, all_results): if __name__ == '__main__': - ordered_models = ['none', 'none_cpen_0.01', 'none_cpen_0.1', 'none_cpen_1', 'mpsf_sr_pen_0.1', 'mpsf_sr_pen_1', 'mpsf_sr_pen_10', 'mpsf_sr_pen_100'] - - colors = { - 'none': 'cornflowerblue', - 'none_cpen_0.01': 'plum', - 'none_cpen_0.1': 'mediumorchid', - 'none_cpen_1': 'darkorchid', - 'mpsf_sr_pen_0.1': 'lightgreen', - 'mpsf_sr_pen_1': 'limegreen', - 'mpsf_sr_pen_10': 'forestgreen', - 'mpsf_sr_pen_100': 'darkgreen', - } + ordered_models = [model for model in os.listdir('./models/rl_models/') if 'curriculum' in model] + colors = plt.cm.viridis(np.linspace(0, 1, len(ordered_models))) def extract_rate_of_change_of_inputs(results_data, certified=True): return extract_rate_of_change(results_data, certified, order=1, mode='input') diff --git a/experiments/mpsc/train_all_models.sh b/experiments/mpsc/train_all_models.sh index dc7152cab..ca7fb021f 100755 --- a/experiments/mpsc/train_all_models.sh +++ b/experiments/mpsc/train_all_models.sh @@ -1,5 +1,5 @@ #!/bin/bash -sbatch train_model.sbatch True 1 1 +sbatch train_model.sbatch False 1 1 for MPSC_COST_HORIZON in 2 5 10 20; do for DECAY_FACTOR in 0.25 0.5 0.75 1; do sbatch train_model.sbatch True $MPSC_COST_HORIZON $DECAY_FACTOR diff --git a/experiments/mpsc/train_model.sbatch b/experiments/mpsc/train_model.sbatch index 1006e5ce3..ca2bd81e4 100755 --- a/experiments/mpsc/train_model.sbatch +++ b/experiments/mpsc/train_model.sbatch @@ -41,9 +41,10 @@ python3 train_rl.py \ sf_config.decay_factor=${DECAY_FACTOR} \ sf_config.max_decay_factor=${DECAY_FACTOR} \ sf_config.soften_constraints=True \ - algo_config.filter_train_actions=${FILTER} \ - algo_config.use_safe_reset=${FILTER} \ - algo_config.penalize_sf_diff=${FILTER} \ - algo_config.sf_penalty=SF_PENALTY + algo_config.filter_train_actions=True \ + algo_config.use_safe_reset=True \ + algo_config.penalize_sf_diff=True \ + algo_config.sf_penalty=$SF_PENALTY \ + algo_config.decay_factor_curriculum=$1 ./mpsc_experiment.sh $TAG $SYS $TASK $ALGO diff --git a/safe_control_gym/controllers/ppo/ppo.py b/safe_control_gym/controllers/ppo/ppo.py index 1aeb5e1b9..bf35c673c 100644 --- a/safe_control_gym/controllers/ppo/ppo.py +++ b/safe_control_gym/controllers/ppo/ppo.py @@ -162,7 +162,8 @@ def learn(self, ): '''Performs learning (pre-training, training, fine-tuning, etc).''' while self.total_steps < self.max_env_steps: - self.safety_filter.decay_factor = self.safety_filter.max_decay_factor * (self.total_steps / self.max_env_steps) + if self.decay_factor_curriculum: + self.safety_filter.set_decay_factor(self.safety_filter.max_decay_factor * (self.total_steps / self.max_env_steps)) results = self.train_step() # Checkpoint. if self.total_steps >= self.max_env_steps or (self.save_interval and self.total_steps % self.save_interval == 0): diff --git a/safe_control_gym/safety_filters/mpsc/nl_mpsc.py b/safe_control_gym/safety_filters/mpsc/nl_mpsc.py index 2bcb0b960..557b4999a 100644 --- a/safe_control_gym/safety_filters/mpsc/nl_mpsc.py +++ b/safe_control_gym/safety_filters/mpsc/nl_mpsc.py @@ -1040,12 +1040,6 @@ def setup_acados_optimizer(self): solver_json = 'acados_ocp_mpsf.json' ocp_solver = AcadosOcpSolver(ocp, json_file=solver_json, generate=True, build=True) - for stage in range(self.mpsc_cost_horizon): - ocp_solver.cost_set(stage, 'W', (self.cost_function.decay_factor**stage) * ocp.cost.W) - - for stage in range(self.mpsc_cost_horizon, self.horizon): - ocp_solver.cost_set(stage, 'W', 0 * ocp.cost.W) - s_var = np.zeros((self.horizon + 1)) g = np.zeros((self.horizon, self.p)) @@ -1057,4 +1051,14 @@ def setup_acados_optimizer(self): g[i, :] += (self.L_x @ self.X_mid) + (self.L_u @ self.U_mid) ocp_solver.constraints_set(i, 'ug', g[i, :]) + self.ocp = ocp self.ocp_solver = ocp_solver + + self.set_decay_factor(self.cost_function.decay_factor) + + def set_decay_factor(self, new_decay_factor): + for stage in range(self.mpsc_cost_horizon): + self.ocp_solver.cost_set(stage, 'W', (new_decay_factor**stage) * self.ocp.cost.W) + + for stage in range(self.mpsc_cost_horizon, self.horizon): + self.ocp_solver.cost_set(stage, 'W', 0 * self.ocp.cost.W)