Updated functions in cal

NOAA-OWP · Jul 5, 2024 · 52ebd10 · 52ebd10
1 parent c9c39b3
commit 52ebd10
Show file tree

Hide file tree

Showing 5 changed files with 76 additions and 40 deletions.
diff --git a/python/runCalibValid/ngen_cal/src/ngen/cal/gwo_global_best.py b/python/runCalibValid/ngen_cal/src/ngen/cal/gwo_global_best.py
@@ -197,7 +197,6 @@ def optimize(
             self.swarm.pbest_pos, self.swarm.pbest_cost = compute_pbest(self.swarm)
             # Update leader and best cost and the corresponding positions 
             alpha, beta, delta = self.__get_abd(self.swarm.n_particles, self.swarm.current_cost)
-            # Compute best cost and position
             if verbose:
                 self.rep.hook(best_cost=self.swarm.best_cost)
             # save history

diff --git a/python/runCalibValid/ngen_cal/src/ngen/cal/model.py b/python/runCalibValid/ngen_cal/src/ngen/cal/model.py
@@ -107,7 +107,7 @@ def __init__(self, **kwargs):
         self._last_output_file = kwargs.pop('last_output_file', Path('{}_output_last_iteration.csv'.format(self.basinID)))
         self._best_output_file = kwargs.pop('best_output_file', Path('{}_output_best_iteration.csv'.format(self.basinID)))
         self._last_iter_file = kwargs.pop('last_iter_file', Path('{}_last_iteration.csv'.format(self.basinID)))
-        self._cost_iter_file = kwargs.pop('cost_iter_file', Path('{}_cost_iteration.csv'.format(self.basinID)))
+        self._cost_iter_file = kwargs.pop('cost_iter_file', Path('{}_cost_iter.csv'.format(self.basinID)))
 
         if self.evaluation_start and self.evaluation_stop:
             self._eval_range = (self.evaluation_start, self.evaluation_stop)
@@ -260,34 +260,41 @@ def write_last_iteration(self, i: int) -> None:
             log_file.writelines(['{}, '.format(self.basinID), '{} \n'.format(i)])
 
 
-    def write_cost_iter_file(self, i: int, calib_run_path: Path) -> None:
-        """Write global and local best cost function at each iteration into csv file.
+    def write_cost_iter_file(self, i: int, calib_run_path: Path) -> Path:
+        """Write global best cost function at each iteration into csv file.
 
         Parameters
         ----------
         i : iteration
         calib_run_path : directory for calibration run
 
+        Returns:
+        ----------
+        cost_iter_file : Path
+
         """
-        workdirs = [os.path.join(calib_run_path, pnm) for pnm in os.listdir(calib_run_path) if os.path.isdir(os.path.join(calib_run_path, pnm))]
+        cost_iter_file = os.path.join(calib_run_path, self._cost_iter_file)
+
+        obj_file = glob.glob(os.path.join(calib_run_path, 'ngen*', '*objective_log.txt'))
         df_log = pd.DataFrame()
-        for wdir in workdirs:
-            logfile = os.path.join(wdir, '{}_objective_log.txt'.format(self.basinID))
-            if os.path.exists(logfile):
-                wlog = pd.read_csv(logfile)
-                wlog['agent'] = os.path.basename(wdir)
-                df_log = pd.concat([df_log, wlog], ignore_index=True)
-        df_cost=pd.DataFrame()
-        for iter in range(0, i+1):
-                df_log_iter = df_log[df_log['iteration']==iter][['iteration', 'best_objective_function', 'agent']]
-                best_cost = pd.DataFrame({'iteration': iter, 'global_best': df_log_iter['best_objective_function'].min(),
-                             'local_best': df_log_iter['best_objective_function'].mean()}, index=[0])
+        for f in obj_file:
+            alog = pd.read_csv(f)
+            df_log = pd.concat([df_log, alog], ignore_index=True)
+
+        df_cost = pd.DataFrame()
+        for n in range(0, i+1):
+            df_log_iter = df_log.query('iteration==@n')[['iteration', 'best_objective_function']]
+            if df_log_iter.shape[0] > 0:
+                best_cost = pd.DataFrame({'iteration': n, 'global_best': df_log_iter['best_objective_function'].min()}, index=[0])
                 df_cost = pd.concat([df_cost, best_cost], ignore_index=True)
-        calib_cost_iter_file = os.path.join(calib_run_path, self._cost_iter_file)
-        df_cost.to_csv(calib_cost_iter_file, index=False)
 
+        if df_cost.shape[0] == i+1 and df_log.shape[0] == len(obj_file)*i+1:
+            df_cost.to_csv(cost_iter_file, index=False)
+
+        return cost_iter_file
 
-    def write_hist_file(self, optimizer_result: 'SwarmOptimizer', agent: 'Agent', params_lst: list) -> None:
+
+    def write_hist_file(self, optimizer_result: 'SwarmOptimizer', agent: 'Agent', params: 'pd.DataFrame') -> Path:
         """Write cost and position history plus global best position into csv files.
 
         Parameters
@@ -296,21 +303,25 @@ def write_hist_file(self, optimizer_result: 'SwarmOptimizer', agent: 'Agent', pa
         agent : Agent object
         params_lst : Calibration parameter list
 
+        Returns:
+        ----------
+        cost_hist_file : Path
+
         """
         # Save best cost
         cost_hist = {"iteration": range(1, len(optimizer_result.cost_history) + 1),
                          "global_best": optimizer_result.cost_history,
-                         "local_best": optimizer_result.mean_pbest_history}
+                         "mean_local_best": optimizer_result.mean_pbest_history}
         if agent.algorithm=="gwo":
-            cost_hist.update({"leader_best": optimizer_result.mean_leader_history})
+            cost_hist.update({"mean_leader_best": optimizer_result.mean_leader_history})
         cost_hist = pd.DataFrame(cost_hist)
         cost_hist_file = os.path.join(agent.workdir, '{}_cost_hist.csv'.format(self.basinID))
         cost_hist.to_csv(cost_hist_file, index=False)
 
         # Save parameters of swarms
         pos_hist = pd.DataFrame()
         for i in range(len(optimizer_result.pos_history)):
-            pos_df = pd.DataFrame(optimizer_result.pos_history[i], columns=params_lst)
+            pos_df = pd.DataFrame(optimizer_result.pos_history[i], columns=params['param'].tolist())
             pos_df['agent'] = range(1, optimizer_result.swarm.n_particles + 1)
             pos_df['iteration'] = i + 1
             pos_hist = pd.concat([pos_hist, pos_df], ignore_index=True)
@@ -320,8 +331,8 @@ def write_hist_file(self, optimizer_result: 'SwarmOptimizer', agent: 'Agent', pa
         # Save best parameters
         best_pos = pd.DataFrame(optimizer_result.swarm.best_pos, columns=["global_best_params"])
         best_pos.reset_index(inplace=True, drop=True)
-        best_pos['param'] = params_lst
-        best_pos['model'] = list(agent.model_params.keys())*len(best_pos)
+        best_pos['param'] = params['param'].tolist()
+        best_pos['model'] = params['model'].tolist()
         best_pos_file = os.path.join(agent.workdir, '{}_global_best_params.csv'.format(self.basinID))
         best_pos.to_csv(best_pos_file, index=False)
 

diff --git a/python/runCalibValid/ngen_cal/src/ngen/cal/plot_functions.py b/python/runCalibValid/ngen_cal/src/ngen/cal/plot_functions.py
@@ -681,17 +681,21 @@ def plot_fdc_valid(
 
 
 def plot_cost_hist(
-    cost_hist_file: Union[str, os.PathLike], 
+    cost_file: Union[str, os.PathLike], 
     plotfile: Union[str, os.PathLike], 
     title: Optional[str] = None,
+    algorithm: Optional[str] = None,
+    calib_iter: Optional[bool] = False,
 ) -> None:
     """Plot convergence curve.
 
     Parameters:
     ----------
-    cost_hist_file : File containing global best and mean local best cost function values at each iteration
+    cost_file : File containing global best and mean local best cost at each iteration
     plotfile : Image file 
     title : Figure title 
+    algorithm : Optimzation algorithm
+    calib_iter : Whether plot for each iteration or after all iterations are finished, default False
 
     Returns:
     ----------
@@ -700,15 +704,26 @@ def plot_cost_hist(
     """
     print('---Plotting Convergence Curve for Global and Local Best Values---')
 
+
     # Read file
-    df = pd.read_csv(cost_hist_file)
+    df = pd.read_csv(cost_file)
     df.pop('iteration')
+
+    # Plot args
     colname = df.columns
-    cost_name = {'global_best': 'Global Best', 'local_best': 'Mean Local Best', 'leader_best': 'Mean Leader Best'}
+    cost_name = {'global_best': 'Global Best'}
+    cols = {'global_best': 'r'}
+    markers = {'global_best': 'o'}
+    if not calib_iter :
+        cost_name.update({'mean_local_best': 'Mean Local Best'})
+        cols.update({'mean_local_best': 'b'})
+        markers.update({'mean_local_best': '^'})
+        if algorithm == "gwo":
+            cost_name.update({'mean_leader_best': 'Mean Leader Best'})
+            cols.update({'mean_leader_best': 'y'})
+            markers.update({'mean_leader_best': 'd'})
 
     # Plot
-    cols = {'global_best': 'r', 'local_best': 'b', 'leader_best': 'y'}
-    markers = {'global_best': 'o', 'local_best': '^', 'leader_best': 'd'}
     fig, ax = plt.subplots(dpi=150, tight_layout=True)
     for x in colname:
         ax.plot(np.arange(0,len(df)), df[x], c=cols[x], label=cost_name[x], linewidth=1)

diff --git a/python/runCalibValid/ngen_cal/src/ngen/cal/plot_output.py b/python/runCalibValid/ngen_cal/src/ngen/cal/plot_output.py
@@ -259,4 +259,4 @@ def plot_cost_func(
     else:
         plotfile = os.path.join(agent.workdir, calibration_object.basinID + '_cost_hist.png')
     title  = algorithm.upper() + ' Convergence Curve ' + '\n' + calibration_object.station_name
-    plf.plot_cost_hist(cost_hist_file, plotfile, title)
+    plf.plot_cost_hist(cost_hist_file, plotfile, title, algorithm, calib_iter)
diff --git a/python/runCalibValid/ngen_cal/src/ngen/cal/search.py b/python/runCalibValid/ngen_cal/src/ngen/cal/search.py
@@ -128,9 +128,11 @@ def _evaluate(i: int, calibration_object: 'Evaluatable', agent: 'Agent', info: b
                                          agent.job.workdir, agent.calib_path_output, calibration_object.save_output_iter_flag)
     calibration_object.save_best_output(str(calibration_object.best_output_file), calibration_object.best_save_flag)
 
-    # Save global and local best cost, and plot
-    if len(glob.glob('*.log'))==1 and agent.algorithm !='dds':
-        calibration_object.write_cost_iter_file(i, agent.workdir) 
+    # Save global best cost and plot
+    if agent.algorithm !='dds':
+        cost_iter_file = calibration_object.write_cost_iter_file(i, agent.workdir)
+        if len(glob.glob('*.log'))==1:
+            plot_cost_func(calibration_object, agent, cost_iter_file, agent.algorithm, calib_iter=True)
 
     # Plot metrics, parameters and output
     if len(glob.glob('*.log'))==1 and i%calibration_object.save_plot_iter_freq==0:
@@ -139,6 +141,7 @@ def _evaluate(i: int, calibration_object: 'Evaluatable', agent: 'Agent', info: b
     # Save last iteration
     calibration_object.write_last_iteration(i)
 
+    return score
 
 def dds_update(iteration: int, inclusion_probability: float, calibration_object: 'Adjustable', agent: 'Agent') -> None:
     """ Dynamically dimensioned search optimization algorithm. 
@@ -352,7 +355,8 @@ def pso_search(start_iteration: int, iterations: int,  agent: 'Agent') -> None:
         if start_iteration == 0:
             if calibration_object.output is None:
                 print("Running {} to produce initial simulation".format(agent.cmd))
-                agent.update_config(start_iteration, calibration_object.df[[str(start_iteration), 'param', 'model']], calibration_object.id)
+                calibration_object.df_fill(start_iteration)
+                agent.update_config(start_iteration, calibration_object.adf[[str(start_iteration), 'param', 'model']], calibration_object.id)
                 _execute(agent, start_iteration)
             with pushd(agent.job.workdir):
                 _evaluate(0, calibration_object, agent, info=True)
@@ -385,11 +389,14 @@ def pso_search(start_iteration: int, iterations: int,  agent: 'Agent') -> None:
         print(calibration_object.df[['param','global_best']].set_index('param'))
 
         # Save and plot history  
-        cost_hist_file = calibration_object.write_hist_file(optimizer, agent, list(calibration_object.df['param']))
+        cost_hist_file = calibration_object.write_hist_file(optimizer, agent, calibration_object.df)
         plot_cost_func(calibration_object, agent, cost_hist_file, agent.algorithm)
 
         # Create configuration files for validation run
-        calibration_object.create_valid_realization_file(agent, calibration_object.df) 
+        calibration_object.df[str(iterations)] = calibration_object.df['global_best']
+        calibration_object.df_fill(iterations)
+        calibration_object.adf['global_best'] = calibration_object.adf[str(iterations)]
+        calibration_object.create_valid_realization_file(agent, calibration_object.adf)
 
         # Indicate completion 
         calibration_object.write_run_complete_file(agent.run_name, agent.workdir)
@@ -424,7 +431,8 @@ def gwo_search(start_iteration: int, iterations: int,  agent)->None:
         if start_iteration == 0:
             if calibration_object.output is None:
                 print("Running {} to produce initial simulation".format(agent.cmd))
-                agent.update_config(start_iteration, calibration_object.df[[str(start_iteration), 'param', 'model']], calibration_object.id)
+                calibration_object.df_fill(start_iteration)
+                agent.update_config(start_iteration, calibration_object.adf[[str(start_iteration), 'param', 'model']], calibration_object.id)
                 _execute(agent, start_iteration)
             with pushd(agent.job.workdir):
                 _evaluate(0, calibration_object, agent, info=True)
@@ -445,11 +453,14 @@ def gwo_search(start_iteration: int, iterations: int,  agent)->None:
         print(calibration_object.df[['param','global_best']].set_index('param'))
 
         # Save and plot history
-        cost_hist_file = calibration_object.write_hist_file(optimizer, agent, list(calibration_object.df['param']))
+        cost_hist_file = calibration_object.write_hist_file(optimizer, agent, calibration_object.df)
         plot_cost_func(calibration_object, agent, cost_hist_file, agent.algorithm)
 
         # Create configuration files for validation run
-        calibration_object.create_valid_realization_file(agent, calibration_object.df)
+        calibration_object.df[str(iterations)] = calibration_object.df['global_best']
+        calibration_object.df_fill(iterations)
+        calibration_object.adf['global_best'] = calibration_object.adf[str(iterations)]
+        calibration_object.create_valid_realization_file(agent, calibration_object.adf)
 
         # Indicate completion
         calibration_object.write_run_complete_file(agent.run_name, agent.workdir)