From 5d254c5084661d85e557b9fcbcdfac6a3d40e643 Mon Sep 17 00:00:00 2001
From: Federico-PizarroBejarano <federico.pizarrobejarano@mail.utoronto.ca>
Date: Mon, 7 Oct 2024 15:12:06 -0400
Subject: [PATCH] Minor updates

---
 .gitignore                                    |   2 +
 .../quadrotor_3D/cpo_quadrotor_3D.yaml        |  14 +-
 experiments/mpsc/plotting_results.py          | 135 +++++++++++++-----
 experiments/mpsc/train_all_models.sh          |  22 ++-
 experiments/mpsc/train_model.sbatch           |   4 +-
 safe_control_gym/controllers/ppo/ppo.py       |  23 ++-
 safe_control_gym/envs/benchmark_env.py        |   6 +-
 7 files changed, 137 insertions(+), 69 deletions(-)

diff --git a/.gitignore b/.gitignore
index f02ab33c7..6a294d7ec 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,6 +8,8 @@ examples/pid/*data/
 #
 experiments/mpsc/temp-data/
 experiments/mpsc/unsafe_rl_temp_data/
+experiments/mpsc/models/rl_models/
+experiments/mpsc/results*/
 #
 results/
 z_docstring.py
diff --git a/experiments/mpsc/config_overrides/quadrotor_3D/cpo_quadrotor_3D.yaml b/experiments/mpsc/config_overrides/quadrotor_3D/cpo_quadrotor_3D.yaml
index 107d5f053..3a7b3a2ff 100644
--- a/experiments/mpsc/config_overrides/quadrotor_3D/cpo_quadrotor_3D.yaml
+++ b/experiments/mpsc/config_overrides/quadrotor_3D/cpo_quadrotor_3D.yaml
@@ -1,8 +1,8 @@
 algo: cpo
 algo_config:
   # Model args
-  hidden1: 128
-  hidden2: 128
+  hidden1: 256
+  hidden2: 256
 
   # Optim args
   discount_factor: 0.98
@@ -16,15 +16,15 @@ algo_config:
   cost_d: 0.0
 
   # Runner args
-  max_steps: 1000
-  num_epochs: 4000
-  value_epochs: 150
+  max_steps: 2000
+  num_epochs: 5000
+  value_epochs: 300
   eval_batch_size: 20
 
   # Misc
-  log_interval: 40
+  log_interval: 50
   save_interval: 0
   num_checkpoints: 0
-  eval_interval: 40
+  eval_interval: 50
   eval_save_best: True
   tensorboard: False
diff --git a/experiments/mpsc/plotting_results.py b/experiments/mpsc/plotting_results.py
index 4ae507a4c..ef247be99 100644
--- a/experiments/mpsc/plotting_results.py
+++ b/experiments/mpsc/plotting_results.py
@@ -12,8 +12,7 @@
 from safe_control_gym.safety_filters.mpsc.mpsc_utils import get_discrete_derivative, high_frequency_content
 from safe_control_gym.utils.plotting import load_from_logs
 
-plot = False
-save_figs = True
+plot = True  # Saves figure if False
 
 U_EQs = {
     'cartpole': 0,
@@ -26,11 +25,12 @@
 
 
 def load_all_models(system, task, algo):
-    '''Loads the results of every MPSC cost function for a specific experiment with every algo.
+    '''Loads the results of every experiment.
 
     Args:
-        system (str): The system to be controlled.
-        task (str): The task to be completed (either 'stab' or 'track').
+        system (str): The system to be plotted.
+        task (str): The task to be plotted (either 'stab' or 'track').
+        algo (str): The controller to be plotted.
 
     Returns:
         all_results (dict): A dictionary containing all the results.
@@ -38,10 +38,10 @@ def load_all_models(system, task, algo):
 
     all_results = {}
 
-    for model in os.listdir(f'./models/rl_models/{system}/{task}/{algo}/'):
+    for model in ordered_models:
         all_results[model] = []
-        for seed in os.listdir(f'./models/rl_models/{system}/{task}/{algo}/{model}/'):
-            with open(f'./results_mpsc/{system}/{task}/{algo}/results_{system}_{task}_{algo}_{model}/{seed}.pkl', 'rb') as f:
+        for seed in os.listdir(f'./results_mpsc/{system}/{task}/{algo}/results_{system}_{task}_{algo}_{model}/'):
+            with open(f'./results_mpsc/{system}/{task}/{algo}/results_{system}_{task}_{algo}_{model}/{seed}', 'rb') as f:
                 all_results[model].append(pickle.load(f))
         consolidate_multiple_seeds(all_results, model)
 
@@ -497,9 +497,10 @@ def plot_model_comparisons(system, task, algo, data_extractor):
     '''Plots the constraint violations of every controller for a specific experiment.
 
     Args:
-        system (str): The system to be controlled.
-        task (str): The task to be completed (either 'stab' or 'track').
-        mpsc_cost_horizon (str): The cost horizon used by the smooth MPSC cost functions.
+        system (str): The system to be plotted.
+        task (str): The task to be plotted (either 'stab' or 'track').
+        algo (str): The controller to be plotted.
+        data_extractor (func): The function which extracts the desired data.
     '''
 
     all_results = load_all_models(system, task, algo)
@@ -507,11 +508,11 @@ def plot_model_comparisons(system, task, algo, data_extractor):
     fig = plt.figure(figsize=(16.0, 10.0))
     ax = fig.add_subplot(111)
 
-    labels = sorted(os.listdir(f'./models/rl_models/{system}/{task}/{algo}/'))
+    labels = ordered_models
 
     data = []
 
-    for model in labels:
+    for model in ordered_models:
         exp_data = all_results[model]
         data.append(data_extractor(exp_data))
 
@@ -522,24 +523,71 @@ def plot_model_comparisons(system, task, algo, data_extractor):
     ax.set_xticks(x, labels, weight='bold', fontsize=15, rotation=30, ha='right')
 
     medianprops = dict(linestyle='--', linewidth=2.5, color='black')
-    bplot = ax.boxplot(data, patch_artist=True, labels=labels, medianprops=medianprops, widths=[0.75] * len(labels))
-
-    colors = {'mpsf_sr_pen_1': 'lightgreen', 'mpsf_sr_pen_10': 'limegreen', 'mpsf_sr_pen_100': 'forestgreen', 'mpsf_sr_pen_1000': 'darkgreen', 'none': 'cornflowerblue', 'none_cpen': 'plum'}
+    bplot = ax.boxplot(data, patch_artist=True, labels=labels, medianprops=medianprops, widths=[0.75] * len(labels), showfliers=False)
 
     for patch, color in zip(bplot['boxes'], colors.values()):
         patch.set_facecolor(color)
 
     fig.tight_layout()
 
-    if data_extractor != extract_reward_cert:
-        ax.set_ylim(ymin=0)
     ax.yaxis.grid(True)
 
     if plot is True:
         plt.show()
-    if save_figs:
+    else:
         image_suffix = data_extractor.__name__.replace('extract_', '')
-        fig.savefig(f'./results_mpsc/{system}/{task}/{algo}/graphs/{system}_{task}_{image_suffix}.png', dpi=300)
+        fig.savefig(f'./results_mpsc/{image_suffix}.png', dpi=300)
+    plt.close()
+
+
+def plot_step_time(system, task, algo):
+    '''Plots the constraint violations of every controller for a specific experiment.
+
+    Args:
+        system (str): The system to be plotted.
+        task (str): The task to be plotted (either 'stab' or 'track').
+        algo (str): The controller to be plotted.
+    '''
+
+    all_results = {}
+    for model in ordered_models:
+        all_results[model] = []
+        for seed in os.listdir(f'./models/rl_models/{system}/{task}/{algo}/{model}/'):
+            all_results[model].append(load_from_logs(f'./models/rl_models/{system}/{task}/{algo}/{model}/{seed}/logs/'))
+
+    fig = plt.figure(figsize=(16.0, 10.0))
+    ax = fig.add_subplot(111)
+
+    labels = ordered_models
+
+    data = []
+
+    for model in ordered_models:
+        datum = np.array([values['stat/step_time'][3] for values in all_results[model]]).flatten()
+        data.append(datum)
+
+    ylabel = 'Training Time per Step [ms]'
+    ax.set_ylabel(ylabel, weight='bold', fontsize=45, labelpad=10)
+
+    x = np.arange(1, len(labels) + 1)
+    ax.set_xticks(x, labels, weight='bold', fontsize=15, rotation=30, ha='right')
+
+    medianprops = dict(linestyle='--', linewidth=2.5, color='black')
+    bplot = ax.boxplot(data, patch_artist=True, labels=labels, medianprops=medianprops, widths=[0.75] * len(labels), showfliers=False)
+
+    for patch, color in zip(bplot['boxes'], colors.values()):
+        patch.set_facecolor(color)
+
+    fig.tight_layout()
+
+    ax.set_ylim(ymin=0)
+    ax.yaxis.grid(True)
+
+    if plot is True:
+        plt.show()
+    else:
+        image_suffix = 'step_time'
+        fig.savefig(f'./results_mpsc/{image_suffix}.png', dpi=300)
     plt.close()
 
 
@@ -571,43 +619,40 @@ def plot_all_logs(system, task, algo):
     '''Plots comparative plots of all the logs.
 
     Args:
-        system (str): The system to be controlled.
-        task (str): The task to be completed (either 'stab' or 'track').
-        mpsc_cost_horizon (str): The cost horizon used by the smooth MPSC cost functions.
+        system (str): The system to be plotted.
+        task (str): The task to be plotted (either 'stab' or 'track').
+        algo (str): The controller to be plotted.
     '''
     all_results = {}
 
-    for model in os.listdir(f'./models/rl_models/{system}/{task}/{algo}/'):
+    for model in ordered_models:
         all_results[model] = []
         for seed in os.listdir(f'./models/rl_models/{system}/{task}/{algo}/{model}/'):
             all_results[model].append(load_from_logs(f'./models/rl_models/{system}/{task}/{algo}/{model}/{seed}/logs/'))
 
-    for key in all_results['none'][0].keys():
-        plot_log(system, task, algo, key, all_results)
+    for key in all_results[ordered_models[0]][0].keys():
+        if key == 'stat_eval/ep_return':
+            plot_log(key, all_results)
+        if key == 'stat/constraint_violation':
+            plot_log(key, all_results)
 
 
-def plot_log(system, task, algo, key, all_results):
+def plot_log(key, all_results):
     '''Plots a comparative plot of the log 'key'.
 
     Args:
-        system (str): The system to be controlled.
-        task (str): The task to be completed (either 'stab' or 'track').
-        mpsc_cost_horizon (str): The cost horizon used by the smooth MPSC cost functions.
         key (str): The name of the log to be plotted.
         all_results (dict): A dictionary of all the logged results for all models.
     '''
     fig = plt.figure(figsize=(16.0, 10.0))
     ax = fig.add_subplot(111)
 
-    labels = sorted(all_results.keys())
-    labels = [label for label in labels if '_es' not in label]
+    labels = ordered_models
 
-    colors = {'mpsf_sr_pen_1': 'lightgreen', 'mpsf_sr_pen_10': 'limegreen', 'mpsf_sr_pen_100': 'forestgreen', 'mpsf_sr_pen_1000': 'darkgreen', 'none': 'cornflowerblue', 'none_cpen': 'plum'}
-
-    for model in labels:
+    for model, label in zip(ordered_models, labels):
         x = all_results[model][0][key][1] / 1000
         all_data = np.array([values[key][3] for values in all_results[model]])
-        ax.plot(x, np.mean(all_data, axis=0), label=model, color=colors[model])
+        ax.plot(x, np.mean(all_data, axis=0), label=label, color=colors[model])
         ax.fill_between(x, np.min(all_data, axis=0), np.max(all_data, axis=0), alpha=0.3, edgecolor=colors[model], facecolor=colors[model])
 
     ax.set_ylabel(key, weight='bold', fontsize=45, labelpad=10)
@@ -619,14 +664,25 @@ def plot_log(system, task, algo, key, all_results):
 
     if plot is True:
         plt.show()
-    if save_figs:
+    else:
         image_suffix = key.replace('/', '__')
-        fig.savefig(f'./results_mpsc/{system}/{task}/{algo}/graphs/{system}_{task}_{image_suffix}.png', dpi=300)
+        fig.savefig(f'./results_mpsc/{image_suffix}.png', dpi=300)
     plt.close()
 
 
 if __name__ == '__main__':
-    ordered_costs = ['one_step', 'regularized', 'precomputed']
+    ordered_models = ['none', 'none_cpen_0.01', 'none_cpen_0.1', 'none_cpen_1', 'mpsf_sr_pen_0.1', 'mpsf_sr_pen_1', 'mpsf_sr_pen_10', 'mpsf_sr_pen_100']
+
+    colors = {
+        'none': 'cornflowerblue',
+        'none_cpen_0.01': 'plum',
+        'none_cpen_0.1': 'mediumorchid',
+        'none_cpen_1': 'darkorchid',
+        'mpsf_sr_pen_0.1': 'lightgreen',
+        'mpsf_sr_pen_1': 'limegreen',
+        'mpsf_sr_pen_10': 'forestgreen',
+        'mpsf_sr_pen_100': 'darkgreen',
+    }
 
     def extract_rate_of_change_of_inputs(results_data, certified=True):
         return extract_rate_of_change(results_data, certified, order=1, mode='input')
@@ -682,6 +738,7 @@ def extract_length_uncert(results_data, certified=False):
         algo_name = sys.argv[3]
 
     plot_all_logs(system_name, task_name, algo_name)
+    plot_step_time(system_name, task_name, algo_name)
     plot_model_comparisons(system_name, task_name, algo_name, extract_magnitude_of_corrections)
     plot_model_comparisons(system_name, task_name, algo_name, extract_percent_magnitude_of_corrections)
     plot_model_comparisons(system_name, task_name, algo_name, extract_max_correction)
diff --git a/experiments/mpsc/train_all_models.sh b/experiments/mpsc/train_all_models.sh
index df89200d9..a3cdcb8fd 100755
--- a/experiments/mpsc/train_all_models.sh
+++ b/experiments/mpsc/train_all_models.sh
@@ -2,13 +2,21 @@
 for SYS in quadrotor_3D; do
     for ALGO in ppo; do
         for TASK in track; do
-            for SEED in 42 62 821 99 4077; do # 1102 1014 14 960406 2031; do
-                sbatch train_model.sbatch mpsf True True $SYS $TASK $ALGO False 1 $SEED #mpsf_sr_pen_1
-                sbatch train_model.sbatch mpsf True True $SYS $TASK $ALGO False 10 $SEED #mpsf_sr_pen_10
-                sbatch train_model.sbatch mpsf True True $SYS $TASK $ALGO False 100 $SEED #mpsf_sr_pen_100
-                sbatch train_model.sbatch mpsf True True $SYS $TASK $ALGO False 1000 $SEED #mpsf_sr_pen_1000
-                sbatch train_model.sbatch none False False $SYS $TASK $ALGO False False $SEED #none
-                sbatch train_model.sbatch none False False $SYS $TASK $ALGO True False $SEED #none_cpen
+            for SEED in 42 62 821 99 4077; do
+                # MPSF Ablation
+                ./train_model.sbatch none False False $SYS $TASK $ALGO False False $SEED #none
+                ./train_model.sbatch none False True  $SYS $TASK $ALGO False 1     $SEED #none_pen_1
+                ./train_model.sbatch none True  False $SYS $TASK $ALGO False False $SEED #none_sr
+                ./train_model.sbatch none True  True  $SYS $TASK $ALGO False 1     $SEED #none_sr_pen_1
+                ./train_model.sbatch mpsf False False $SYS $TASK $ALGO False False $SEED #mpsf
+                ./train_model.sbatch mpsf False True  $SYS $TASK $ALGO False 1     $SEED #mpsf_pen_1
+                ./train_model.sbatch mpsf True  False $SYS $TASK $ALGO False False $SEED #mpsf_sr
+                ./train_model.sbatch mpsf True  True  $SYS $TASK $ALGO False 1     $SEED #mpsf_sr_pen_1
+
+                # Constr Pen
+                ./train_model.sbatch none False False $SYS $TASK $ALGO True  0.01  $SEED #none_cpen_0.01
+                ./train_model.sbatch none False False $SYS $TASK $ALGO True  0.1   $SEED #none_cpen_0.1
+                ./train_model.sbatch none False False $SYS $TASK $ALGO True  1     $SEED #none_cpen_1
             done
         done
     done
diff --git a/experiments/mpsc/train_model.sbatch b/experiments/mpsc/train_model.sbatch
index 2cfb9e9ce..2f736f798 100755
--- a/experiments/mpsc/train_model.sbatch
+++ b/experiments/mpsc/train_model.sbatch
@@ -70,8 +70,10 @@ fi
 
 if [ "$8" = False ]; then
     SF_PEN_TAG=''
+    CONSTR_PEN_VAL=0
 else
     SF_PEN_TAG="_$8"
+    CONSTR_PEN_VAL=$8
 fi
 
 if [ -z "$9" ]; then
@@ -103,6 +105,7 @@ python3 train_rl.py \
     --kv_overrides \
         task_config.init_state=None \
         task_config.use_constraint_penalty=${CONSTR_PEN} \
+        task_config.constraint_penalty=${CONSTR_PEN_VAL} \
         sf_config.cost_function=${MPSC_COST} \
         sf_config.mpsc_cost_horizon=${MPSC_COST_HORIZON} \
         sf_config.decay_factor=${DECAY_FACTOR} \
@@ -116,4 +119,3 @@ python3 train_rl.py \
         sf_config.seed=${SEED} \
 
 ./mpsc_experiment.sh $TAG $SYS $TASK $ALGO $SEED
-# python plotting_results.py $SYS $TASK $ALGO
diff --git a/safe_control_gym/controllers/ppo/ppo.py b/safe_control_gym/controllers/ppo/ppo.py
index 6cece223e..90bd65cd6 100644
--- a/safe_control_gym/controllers/ppo/ppo.py
+++ b/safe_control_gym/controllers/ppo/ppo.py
@@ -232,6 +232,7 @@ def run(self,
         ep_returns, ep_lengths = [], []
         frames = []
         total_return = 0
+        start = time.time()
         while len(ep_returns) < n_episodes:
             action = self.select_action(obs=obs, info=info)
 
@@ -244,9 +245,6 @@ def run(self,
                 action = env.normalize_action(certified_action)
             else:
                 self.safety_filter.ocp_solver.reset()
-                certified_action, success = self.safety_filter.certify_action(unextended_obs, physical_action, info)
-                if success:
-                    action = self.env.envs[0].normalize_action(certified_action)
 
             action = np.atleast_2d(np.squeeze([action]))
             obs, rew, done, info = env.step(action)
@@ -268,7 +266,11 @@ def run(self,
         # Collect evaluation results.
         ep_lengths = np.asarray(ep_lengths)
         ep_returns = np.asarray(ep_returns)
-        eval_results = {'ep_returns': ep_returns, 'ep_lengths': ep_lengths}
+        eval_results = {
+            'ep_returns': ep_returns,
+            'ep_lengths': ep_lengths,
+            'elapsed_time': time.time() - start
+        }
         if len(frames) > 0:
             eval_results['frames'] = frames
         # Other episodic stats from evaluation env.
@@ -301,9 +303,6 @@ def train_step(self):
                     action = self.env.envs[0].normalize_action(certified_action)
                 else:
                     self.safety_filter.ocp_solver.reset()
-                    certified_action, success = self.safety_filter.certify_action(unextended_obs, physical_action, info)
-                    if success and self.filter_train_actions is True:
-                        action = self.env.envs[0].normalize_action(certified_action)
 
             action = np.atleast_2d(np.squeeze([action]))
             next_obs, rew, done, info = self.env.step(action)
@@ -363,8 +362,7 @@ def log_step(self,
         self.logger.add_scalars(
             {
                 'step': step,
-                'step_time': results['elapsed_time'],
-                'progress': step / self.max_env_steps
+                'progress': step / self.max_env_steps,
             },
             step,
             prefix='time',
@@ -387,7 +385,8 @@ def log_step(self,
                 'ep_length': ep_lengths.mean(),
                 'ep_return': ep_returns.mean(),
                 'ep_reward': (ep_returns / ep_lengths).mean(),
-                'ep_constraint_violation': ep_constraint_violation.mean()
+                'ep_constraint_violation': ep_constraint_violation.mean(),
+                'step_time': results['elapsed_time'],
             },
             step,
             prefix='stat')
@@ -405,7 +404,8 @@ def log_step(self,
                     'ep_return': eval_ep_returns.mean(),
                     'ep_reward': (eval_ep_returns / eval_ep_lengths).mean(),
                     'constraint_violation': eval_constraint_violation.mean(),
-                    'mse': eval_mse.mean()
+                    'mse': eval_mse.mean(),
+                    'step_time': results['eval']['elapsed_time'],
                 },
                 step,
                 prefix='stat_eval')
@@ -438,6 +438,5 @@ def env_reset(self, env, use_safe_reset):
                 _, success = self.safety_filter.certify_action(unextended_obs, action, info)
                 if not success:
                     self.safety_filter.ocp_solver.reset()
-                    _, success = self.safety_filter.certify_action(unextended_obs, action, info)
 
         return obs, info
diff --git a/safe_control_gym/envs/benchmark_env.py b/safe_control_gym/envs/benchmark_env.py
index 9b3bbbc1e..189a86739 100644
--- a/safe_control_gym/envs/benchmark_env.py
+++ b/safe_control_gym/envs/benchmark_env.py
@@ -78,7 +78,7 @@ def __init__(self,
                  constraints=None,
                  done_on_violation: bool = False,
                  use_constraint_penalty=False,
-                 constraint_penalty=-1,
+                 constraint_penalty=1.0,
                  # Disturbance.
                  disturbances=None,
                  adversary_disturbance=None,
@@ -516,10 +516,10 @@ def after_step(self, obs, rew, done, info):
             if self.constraints is not None and self.use_constraint_penalty and self.constraints.is_violated(self, c_value=c_value):
                 if self.rew_exponential:
                     rew = np.log(rew)
-                    rew += self.constraint_penalty
+                    rew -= self.constraint_penalty
                     rew = np.exp(rew)
                 else:
-                    rew += self.constraint_penalty
+                    rew -= self.constraint_penalty
 
         # Terminate when reaching time limit,
         # but distinguish between done due to true termination or time limit reached