From 5d254c5084661d85e557b9fcbcdfac6a3d40e643 Mon Sep 17 00:00:00 2001 From: Federico-PizarroBejarano Date: Mon, 7 Oct 2024 15:12:06 -0400 Subject: [PATCH] Minor updates --- .gitignore | 2 + .../quadrotor_3D/cpo_quadrotor_3D.yaml | 14 +- experiments/mpsc/plotting_results.py | 135 +++++++++++++----- experiments/mpsc/train_all_models.sh | 22 ++- experiments/mpsc/train_model.sbatch | 4 +- safe_control_gym/controllers/ppo/ppo.py | 23 ++- safe_control_gym/envs/benchmark_env.py | 6 +- 7 files changed, 137 insertions(+), 69 deletions(-) diff --git a/.gitignore b/.gitignore index f02ab33c7..6a294d7ec 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,8 @@ examples/pid/*data/ # experiments/mpsc/temp-data/ experiments/mpsc/unsafe_rl_temp_data/ +experiments/mpsc/models/rl_models/ +experiments/mpsc/results*/ # results/ z_docstring.py diff --git a/experiments/mpsc/config_overrides/quadrotor_3D/cpo_quadrotor_3D.yaml b/experiments/mpsc/config_overrides/quadrotor_3D/cpo_quadrotor_3D.yaml index 107d5f053..3a7b3a2ff 100644 --- a/experiments/mpsc/config_overrides/quadrotor_3D/cpo_quadrotor_3D.yaml +++ b/experiments/mpsc/config_overrides/quadrotor_3D/cpo_quadrotor_3D.yaml @@ -1,8 +1,8 @@ algo: cpo algo_config: # Model args - hidden1: 128 - hidden2: 128 + hidden1: 256 + hidden2: 256 # Optim args discount_factor: 0.98 @@ -16,15 +16,15 @@ algo_config: cost_d: 0.0 # Runner args - max_steps: 1000 - num_epochs: 4000 - value_epochs: 150 + max_steps: 2000 + num_epochs: 5000 + value_epochs: 300 eval_batch_size: 20 # Misc - log_interval: 40 + log_interval: 50 save_interval: 0 num_checkpoints: 0 - eval_interval: 40 + eval_interval: 50 eval_save_best: True tensorboard: False diff --git a/experiments/mpsc/plotting_results.py b/experiments/mpsc/plotting_results.py index 4ae507a4c..ef247be99 100644 --- a/experiments/mpsc/plotting_results.py +++ b/experiments/mpsc/plotting_results.py @@ -12,8 +12,7 @@ from safe_control_gym.safety_filters.mpsc.mpsc_utils import get_discrete_derivative, high_frequency_content from safe_control_gym.utils.plotting import load_from_logs -plot = False -save_figs = True +plot = True # Saves figure if False U_EQs = { 'cartpole': 0, @@ -26,11 +25,12 @@ def load_all_models(system, task, algo): - '''Loads the results of every MPSC cost function for a specific experiment with every algo. + '''Loads the results of every experiment. Args: - system (str): The system to be controlled. - task (str): The task to be completed (either 'stab' or 'track'). + system (str): The system to be plotted. + task (str): The task to be plotted (either 'stab' or 'track'). + algo (str): The controller to be plotted. Returns: all_results (dict): A dictionary containing all the results. @@ -38,10 +38,10 @@ def load_all_models(system, task, algo): all_results = {} - for model in os.listdir(f'./models/rl_models/{system}/{task}/{algo}/'): + for model in ordered_models: all_results[model] = [] - for seed in os.listdir(f'./models/rl_models/{system}/{task}/{algo}/{model}/'): - with open(f'./results_mpsc/{system}/{task}/{algo}/results_{system}_{task}_{algo}_{model}/{seed}.pkl', 'rb') as f: + for seed in os.listdir(f'./results_mpsc/{system}/{task}/{algo}/results_{system}_{task}_{algo}_{model}/'): + with open(f'./results_mpsc/{system}/{task}/{algo}/results_{system}_{task}_{algo}_{model}/{seed}', 'rb') as f: all_results[model].append(pickle.load(f)) consolidate_multiple_seeds(all_results, model) @@ -497,9 +497,10 @@ def plot_model_comparisons(system, task, algo, data_extractor): '''Plots the constraint violations of every controller for a specific experiment. Args: - system (str): The system to be controlled. - task (str): The task to be completed (either 'stab' or 'track'). - mpsc_cost_horizon (str): The cost horizon used by the smooth MPSC cost functions. + system (str): The system to be plotted. + task (str): The task to be plotted (either 'stab' or 'track'). + algo (str): The controller to be plotted. + data_extractor (func): The function which extracts the desired data. ''' all_results = load_all_models(system, task, algo) @@ -507,11 +508,11 @@ def plot_model_comparisons(system, task, algo, data_extractor): fig = plt.figure(figsize=(16.0, 10.0)) ax = fig.add_subplot(111) - labels = sorted(os.listdir(f'./models/rl_models/{system}/{task}/{algo}/')) + labels = ordered_models data = [] - for model in labels: + for model in ordered_models: exp_data = all_results[model] data.append(data_extractor(exp_data)) @@ -522,24 +523,71 @@ def plot_model_comparisons(system, task, algo, data_extractor): ax.set_xticks(x, labels, weight='bold', fontsize=15, rotation=30, ha='right') medianprops = dict(linestyle='--', linewidth=2.5, color='black') - bplot = ax.boxplot(data, patch_artist=True, labels=labels, medianprops=medianprops, widths=[0.75] * len(labels)) - - colors = {'mpsf_sr_pen_1': 'lightgreen', 'mpsf_sr_pen_10': 'limegreen', 'mpsf_sr_pen_100': 'forestgreen', 'mpsf_sr_pen_1000': 'darkgreen', 'none': 'cornflowerblue', 'none_cpen': 'plum'} + bplot = ax.boxplot(data, patch_artist=True, labels=labels, medianprops=medianprops, widths=[0.75] * len(labels), showfliers=False) for patch, color in zip(bplot['boxes'], colors.values()): patch.set_facecolor(color) fig.tight_layout() - if data_extractor != extract_reward_cert: - ax.set_ylim(ymin=0) ax.yaxis.grid(True) if plot is True: plt.show() - if save_figs: + else: image_suffix = data_extractor.__name__.replace('extract_', '') - fig.savefig(f'./results_mpsc/{system}/{task}/{algo}/graphs/{system}_{task}_{image_suffix}.png', dpi=300) + fig.savefig(f'./results_mpsc/{image_suffix}.png', dpi=300) + plt.close() + + +def plot_step_time(system, task, algo): + '''Plots the constraint violations of every controller for a specific experiment. + + Args: + system (str): The system to be plotted. + task (str): The task to be plotted (either 'stab' or 'track'). + algo (str): The controller to be plotted. + ''' + + all_results = {} + for model in ordered_models: + all_results[model] = [] + for seed in os.listdir(f'./models/rl_models/{system}/{task}/{algo}/{model}/'): + all_results[model].append(load_from_logs(f'./models/rl_models/{system}/{task}/{algo}/{model}/{seed}/logs/')) + + fig = plt.figure(figsize=(16.0, 10.0)) + ax = fig.add_subplot(111) + + labels = ordered_models + + data = [] + + for model in ordered_models: + datum = np.array([values['stat/step_time'][3] for values in all_results[model]]).flatten() + data.append(datum) + + ylabel = 'Training Time per Step [ms]' + ax.set_ylabel(ylabel, weight='bold', fontsize=45, labelpad=10) + + x = np.arange(1, len(labels) + 1) + ax.set_xticks(x, labels, weight='bold', fontsize=15, rotation=30, ha='right') + + medianprops = dict(linestyle='--', linewidth=2.5, color='black') + bplot = ax.boxplot(data, patch_artist=True, labels=labels, medianprops=medianprops, widths=[0.75] * len(labels), showfliers=False) + + for patch, color in zip(bplot['boxes'], colors.values()): + patch.set_facecolor(color) + + fig.tight_layout() + + ax.set_ylim(ymin=0) + ax.yaxis.grid(True) + + if plot is True: + plt.show() + else: + image_suffix = 'step_time' + fig.savefig(f'./results_mpsc/{image_suffix}.png', dpi=300) plt.close() @@ -571,43 +619,40 @@ def plot_all_logs(system, task, algo): '''Plots comparative plots of all the logs. Args: - system (str): The system to be controlled. - task (str): The task to be completed (either 'stab' or 'track'). - mpsc_cost_horizon (str): The cost horizon used by the smooth MPSC cost functions. + system (str): The system to be plotted. + task (str): The task to be plotted (either 'stab' or 'track'). + algo (str): The controller to be plotted. ''' all_results = {} - for model in os.listdir(f'./models/rl_models/{system}/{task}/{algo}/'): + for model in ordered_models: all_results[model] = [] for seed in os.listdir(f'./models/rl_models/{system}/{task}/{algo}/{model}/'): all_results[model].append(load_from_logs(f'./models/rl_models/{system}/{task}/{algo}/{model}/{seed}/logs/')) - for key in all_results['none'][0].keys(): - plot_log(system, task, algo, key, all_results) + for key in all_results[ordered_models[0]][0].keys(): + if key == 'stat_eval/ep_return': + plot_log(key, all_results) + if key == 'stat/constraint_violation': + plot_log(key, all_results) -def plot_log(system, task, algo, key, all_results): +def plot_log(key, all_results): '''Plots a comparative plot of the log 'key'. Args: - system (str): The system to be controlled. - task (str): The task to be completed (either 'stab' or 'track'). - mpsc_cost_horizon (str): The cost horizon used by the smooth MPSC cost functions. key (str): The name of the log to be plotted. all_results (dict): A dictionary of all the logged results for all models. ''' fig = plt.figure(figsize=(16.0, 10.0)) ax = fig.add_subplot(111) - labels = sorted(all_results.keys()) - labels = [label for label in labels if '_es' not in label] + labels = ordered_models - colors = {'mpsf_sr_pen_1': 'lightgreen', 'mpsf_sr_pen_10': 'limegreen', 'mpsf_sr_pen_100': 'forestgreen', 'mpsf_sr_pen_1000': 'darkgreen', 'none': 'cornflowerblue', 'none_cpen': 'plum'} - - for model in labels: + for model, label in zip(ordered_models, labels): x = all_results[model][0][key][1] / 1000 all_data = np.array([values[key][3] for values in all_results[model]]) - ax.plot(x, np.mean(all_data, axis=0), label=model, color=colors[model]) + ax.plot(x, np.mean(all_data, axis=0), label=label, color=colors[model]) ax.fill_between(x, np.min(all_data, axis=0), np.max(all_data, axis=0), alpha=0.3, edgecolor=colors[model], facecolor=colors[model]) ax.set_ylabel(key, weight='bold', fontsize=45, labelpad=10) @@ -619,14 +664,25 @@ def plot_log(system, task, algo, key, all_results): if plot is True: plt.show() - if save_figs: + else: image_suffix = key.replace('/', '__') - fig.savefig(f'./results_mpsc/{system}/{task}/{algo}/graphs/{system}_{task}_{image_suffix}.png', dpi=300) + fig.savefig(f'./results_mpsc/{image_suffix}.png', dpi=300) plt.close() if __name__ == '__main__': - ordered_costs = ['one_step', 'regularized', 'precomputed'] + ordered_models = ['none', 'none_cpen_0.01', 'none_cpen_0.1', 'none_cpen_1', 'mpsf_sr_pen_0.1', 'mpsf_sr_pen_1', 'mpsf_sr_pen_10', 'mpsf_sr_pen_100'] + + colors = { + 'none': 'cornflowerblue', + 'none_cpen_0.01': 'plum', + 'none_cpen_0.1': 'mediumorchid', + 'none_cpen_1': 'darkorchid', + 'mpsf_sr_pen_0.1': 'lightgreen', + 'mpsf_sr_pen_1': 'limegreen', + 'mpsf_sr_pen_10': 'forestgreen', + 'mpsf_sr_pen_100': 'darkgreen', + } def extract_rate_of_change_of_inputs(results_data, certified=True): return extract_rate_of_change(results_data, certified, order=1, mode='input') @@ -682,6 +738,7 @@ def extract_length_uncert(results_data, certified=False): algo_name = sys.argv[3] plot_all_logs(system_name, task_name, algo_name) + plot_step_time(system_name, task_name, algo_name) plot_model_comparisons(system_name, task_name, algo_name, extract_magnitude_of_corrections) plot_model_comparisons(system_name, task_name, algo_name, extract_percent_magnitude_of_corrections) plot_model_comparisons(system_name, task_name, algo_name, extract_max_correction) diff --git a/experiments/mpsc/train_all_models.sh b/experiments/mpsc/train_all_models.sh index df89200d9..a3cdcb8fd 100755 --- a/experiments/mpsc/train_all_models.sh +++ b/experiments/mpsc/train_all_models.sh @@ -2,13 +2,21 @@ for SYS in quadrotor_3D; do for ALGO in ppo; do for TASK in track; do - for SEED in 42 62 821 99 4077; do # 1102 1014 14 960406 2031; do - sbatch train_model.sbatch mpsf True True $SYS $TASK $ALGO False 1 $SEED #mpsf_sr_pen_1 - sbatch train_model.sbatch mpsf True True $SYS $TASK $ALGO False 10 $SEED #mpsf_sr_pen_10 - sbatch train_model.sbatch mpsf True True $SYS $TASK $ALGO False 100 $SEED #mpsf_sr_pen_100 - sbatch train_model.sbatch mpsf True True $SYS $TASK $ALGO False 1000 $SEED #mpsf_sr_pen_1000 - sbatch train_model.sbatch none False False $SYS $TASK $ALGO False False $SEED #none - sbatch train_model.sbatch none False False $SYS $TASK $ALGO True False $SEED #none_cpen + for SEED in 42 62 821 99 4077; do + # MPSF Ablation + ./train_model.sbatch none False False $SYS $TASK $ALGO False False $SEED #none + ./train_model.sbatch none False True $SYS $TASK $ALGO False 1 $SEED #none_pen_1 + ./train_model.sbatch none True False $SYS $TASK $ALGO False False $SEED #none_sr + ./train_model.sbatch none True True $SYS $TASK $ALGO False 1 $SEED #none_sr_pen_1 + ./train_model.sbatch mpsf False False $SYS $TASK $ALGO False False $SEED #mpsf + ./train_model.sbatch mpsf False True $SYS $TASK $ALGO False 1 $SEED #mpsf_pen_1 + ./train_model.sbatch mpsf True False $SYS $TASK $ALGO False False $SEED #mpsf_sr + ./train_model.sbatch mpsf True True $SYS $TASK $ALGO False 1 $SEED #mpsf_sr_pen_1 + + # Constr Pen + ./train_model.sbatch none False False $SYS $TASK $ALGO True 0.01 $SEED #none_cpen_0.01 + ./train_model.sbatch none False False $SYS $TASK $ALGO True 0.1 $SEED #none_cpen_0.1 + ./train_model.sbatch none False False $SYS $TASK $ALGO True 1 $SEED #none_cpen_1 done done done diff --git a/experiments/mpsc/train_model.sbatch b/experiments/mpsc/train_model.sbatch index 2cfb9e9ce..2f736f798 100755 --- a/experiments/mpsc/train_model.sbatch +++ b/experiments/mpsc/train_model.sbatch @@ -70,8 +70,10 @@ fi if [ "$8" = False ]; then SF_PEN_TAG='' + CONSTR_PEN_VAL=0 else SF_PEN_TAG="_$8" + CONSTR_PEN_VAL=$8 fi if [ -z "$9" ]; then @@ -103,6 +105,7 @@ python3 train_rl.py \ --kv_overrides \ task_config.init_state=None \ task_config.use_constraint_penalty=${CONSTR_PEN} \ + task_config.constraint_penalty=${CONSTR_PEN_VAL} \ sf_config.cost_function=${MPSC_COST} \ sf_config.mpsc_cost_horizon=${MPSC_COST_HORIZON} \ sf_config.decay_factor=${DECAY_FACTOR} \ @@ -116,4 +119,3 @@ python3 train_rl.py \ sf_config.seed=${SEED} \ ./mpsc_experiment.sh $TAG $SYS $TASK $ALGO $SEED -# python plotting_results.py $SYS $TASK $ALGO diff --git a/safe_control_gym/controllers/ppo/ppo.py b/safe_control_gym/controllers/ppo/ppo.py index 6cece223e..90bd65cd6 100644 --- a/safe_control_gym/controllers/ppo/ppo.py +++ b/safe_control_gym/controllers/ppo/ppo.py @@ -232,6 +232,7 @@ def run(self, ep_returns, ep_lengths = [], [] frames = [] total_return = 0 + start = time.time() while len(ep_returns) < n_episodes: action = self.select_action(obs=obs, info=info) @@ -244,9 +245,6 @@ def run(self, action = env.normalize_action(certified_action) else: self.safety_filter.ocp_solver.reset() - certified_action, success = self.safety_filter.certify_action(unextended_obs, physical_action, info) - if success: - action = self.env.envs[0].normalize_action(certified_action) action = np.atleast_2d(np.squeeze([action])) obs, rew, done, info = env.step(action) @@ -268,7 +266,11 @@ def run(self, # Collect evaluation results. ep_lengths = np.asarray(ep_lengths) ep_returns = np.asarray(ep_returns) - eval_results = {'ep_returns': ep_returns, 'ep_lengths': ep_lengths} + eval_results = { + 'ep_returns': ep_returns, + 'ep_lengths': ep_lengths, + 'elapsed_time': time.time() - start + } if len(frames) > 0: eval_results['frames'] = frames # Other episodic stats from evaluation env. @@ -301,9 +303,6 @@ def train_step(self): action = self.env.envs[0].normalize_action(certified_action) else: self.safety_filter.ocp_solver.reset() - certified_action, success = self.safety_filter.certify_action(unextended_obs, physical_action, info) - if success and self.filter_train_actions is True: - action = self.env.envs[0].normalize_action(certified_action) action = np.atleast_2d(np.squeeze([action])) next_obs, rew, done, info = self.env.step(action) @@ -363,8 +362,7 @@ def log_step(self, self.logger.add_scalars( { 'step': step, - 'step_time': results['elapsed_time'], - 'progress': step / self.max_env_steps + 'progress': step / self.max_env_steps, }, step, prefix='time', @@ -387,7 +385,8 @@ def log_step(self, 'ep_length': ep_lengths.mean(), 'ep_return': ep_returns.mean(), 'ep_reward': (ep_returns / ep_lengths).mean(), - 'ep_constraint_violation': ep_constraint_violation.mean() + 'ep_constraint_violation': ep_constraint_violation.mean(), + 'step_time': results['elapsed_time'], }, step, prefix='stat') @@ -405,7 +404,8 @@ def log_step(self, 'ep_return': eval_ep_returns.mean(), 'ep_reward': (eval_ep_returns / eval_ep_lengths).mean(), 'constraint_violation': eval_constraint_violation.mean(), - 'mse': eval_mse.mean() + 'mse': eval_mse.mean(), + 'step_time': results['eval']['elapsed_time'], }, step, prefix='stat_eval') @@ -438,6 +438,5 @@ def env_reset(self, env, use_safe_reset): _, success = self.safety_filter.certify_action(unextended_obs, action, info) if not success: self.safety_filter.ocp_solver.reset() - _, success = self.safety_filter.certify_action(unextended_obs, action, info) return obs, info diff --git a/safe_control_gym/envs/benchmark_env.py b/safe_control_gym/envs/benchmark_env.py index 9b3bbbc1e..189a86739 100644 --- a/safe_control_gym/envs/benchmark_env.py +++ b/safe_control_gym/envs/benchmark_env.py @@ -78,7 +78,7 @@ def __init__(self, constraints=None, done_on_violation: bool = False, use_constraint_penalty=False, - constraint_penalty=-1, + constraint_penalty=1.0, # Disturbance. disturbances=None, adversary_disturbance=None, @@ -516,10 +516,10 @@ def after_step(self, obs, rew, done, info): if self.constraints is not None and self.use_constraint_penalty and self.constraints.is_violated(self, c_value=c_value): if self.rew_exponential: rew = np.log(rew) - rew += self.constraint_penalty + rew -= self.constraint_penalty rew = np.exp(rew) else: - rew += self.constraint_penalty + rew -= self.constraint_penalty # Terminate when reaching time limit, # but distinguish between done due to true termination or time limit reached