From 77141072d6b3cd49a0f4da7b53faeab7cfe36c35 Mon Sep 17 00:00:00 2001
From: MohamedElgammal <mohamed.elgammal@mail.utornto.ca>
Date: Mon, 5 Aug 2024 15:40:03 -0400
Subject: [PATCH 1/8] Adding a script that control the tuning runs with
 multiple tuned parameters and efficiently parse the resulting results

---
 vtr_flow/scripts/tuning_runs/README.md       |  24 +++
 vtr_flow/scripts/tuning_runs/control_runs.py | 204 +++++++++++++++++++
 2 files changed, 228 insertions(+)
 create mode 100644 vtr_flow/scripts/tuning_runs/README.md
 create mode 100755 vtr_flow/scripts/tuning_runs/control_runs.py

diff --git a/vtr_flow/scripts/tuning_runs/README.md b/vtr_flow/scripts/tuning_runs/README.md
new file mode 100644
index 00000000000..eebbcdf2670
--- /dev/null
+++ b/vtr_flow/scripts/tuning_runs/README.md
@@ -0,0 +1,24 @@
+A script used to run tuning experiments with multiple parameters.
+
+Steps to use:
+=============
+    1) edit the first section of the script by setting `params_dict` dictionary to the parameters that you want to sweep and the corresponding values that you want to try. If you want the resulting spreadheet to include specific metrics, set `keep_metrics_only` variable to `True` and the metrics that you care about in `parsed_metrics`. If you want the full parsed result sheet, set `keep_metrics_only` to `False`
+
+    2) run the script as follows:
+'''
+python control_runs.py --generate <path_to_task_to_run>
+'''
+
+This will edit the `config.txt` file of this task adding several lines `script_params_list_add` for each of the combinations of the input params
+
+    3) Launch the task using `run_vtr_task.py` script
+    4) When the run is done, run the script to parse the results as follows:
+'''
+python control_runs.py --parse <path_to_task_to_parse>
+'''
+
+The script will generate 3 csv files in the runXXX idrectory of the task as follows:
+    - `full_res.csv` that exactly matches parse_results.txt but in csv format
+    - `avg_seed.csv` that averages the results of the each circuit with one set of parameters over the different seed values
+    - `geomean_res.csv` that geometrically average the results of all the circuits over the same set of parameters
+    - `summary.xlsx` that merges all the previously mentioned sheets in a single spreadsheet
diff --git a/vtr_flow/scripts/tuning_runs/control_runs.py b/vtr_flow/scripts/tuning_runs/control_runs.py
new file mode 100755
index 00000000000..5588a2a937b
--- /dev/null
+++ b/vtr_flow/scripts/tuning_runs/control_runs.py
@@ -0,0 +1,204 @@
+import itertools
+import os
+import sys
+import csv
+import pandas as pd
+import numpy as np
+from scipy import stats
+
+# Define the global dictionary
+params_dict = {
+    "--seed": [1, 2],
+    "--place_algorithm": ["criticality_timing"],
+    "--place_agent_epsilon": [0.3]
+}
+
+# Set to True if you only care about specific metrics
+keep_metrics_only = True
+parsed_metrics = ["num_io", "num_LAB"]
+
+
+def safe_gmean(series):
+    series = series.replace({0: np.nan})
+    return stats.gmean(series.dropna())
+
+def generate_combinations(params_dict):
+    keys = list(params_dict.keys())
+    values = list(params_dict.values())
+    combinations = list(itertools.product(*values))
+    
+    lines = []
+    for combination in combinations:
+        params_str = ' '.join(f"{key} {value}" for key, value in zip(keys, combination))
+        lines.append(f"script_params_list_add={params_str}\n")
+    return lines
+
+def parse_results(input_path, params_dict):
+    # Find the runXXX directory with the largest XXX
+    run_dirs = [d for d in os.listdir(input_path) if d.startswith("run") and d[3:].isdigit()]
+    if not run_dirs:
+        print("No runXXX directories found in the specified input path.")
+        sys.exit(1)
+    
+    largest_run_dir = max(run_dirs, key=lambda d: int(d[3:]))
+    largest_run_path = os.path.join(input_path, largest_run_dir)
+    
+    # Path to parse_results.txt and full_res.csv
+    parse_results_path = os.path.join(largest_run_path, "parse_results.txt")
+    full_res_csv_path = os.path.join(largest_run_path, "full_res.csv")
+    
+    if not os.path.exists(parse_results_path):
+        print(f"{parse_results_path} not found.")
+        sys.exit(1)
+    
+    # Read the parse_results.txt file and write to full_res.csv
+    with open(parse_results_path, "r") as txt_file, open(full_res_csv_path, "w", newline='') as csv_file:
+        reader = csv.reader(txt_file, delimiter='\t')
+        writer = csv.writer(csv_file)
+        
+        headers = next(reader)
+        script_params_index = headers.index("script_params")
+        
+        # Create new headers with params_dict keys
+        new_headers = headers[:script_params_index] + list(params_dict.keys()) + headers[script_params_index + 1:]
+        writer.writerow(new_headers)
+        
+        for row in reader:
+            script_params_value = row[script_params_index]
+            script_params_dict = parse_script_params(script_params_value, params_dict)
+            new_row = row[:script_params_index] + [script_params_dict.get(key, '') for key in params_dict.keys()] + row[script_params_index + 1:]
+            writer.writerow(new_row)
+    
+    print(f"Converted {parse_results_path} to {full_res_csv_path}")
+    
+    # Generate avg_seed.csv if --seed column exists
+    generate_avg_seed_csv(full_res_csv_path, largest_run_path)
+    print(f"Generated average seed results")
+
+    # Generate gmean_res.csv 
+    generate_geomean_res_csv(os.path.join(largest_run_path, "avg_seed.csv"), largest_run_path, params_dict)
+    print(f"Generated geometric average results over all the circuits")
+
+    generate_xlsx(largest_run_path)
+    print(f"Generated xlsx that merges all the result csv files")
+
+def generate_xlsx(largest_run_path):
+    csv_files = [os.path.join(largest_run_path, "full_res.csv"),
+                 os.path.join(largest_run_path, "avg_seed.csv"),
+                 os.path.join(largest_run_path, "geomean_res.csv")]
+    sheet_names = ["Full res", "Avg. seeds", "Summary"]
+    output_excel_file = os.path.join(largest_run_path, "summary.xlsx") 
+    # Create an Excel writer object
+    with pd.ExcelWriter(output_excel_file) as writer:
+        for csv_file, sheet_name in zip(csv_files, sheet_names):
+            # Read each CSV file
+            df = pd.read_csv(csv_file)
+    
+            # Write each DataFrame to a different sheet
+            df.to_excel(writer, sheet_name=sheet_name, index=False)
+   
+def parse_script_params(script_params, params_dict):
+    parsed_params = {key: '' for key in params_dict.keys()}
+    
+    parts = script_params.split('_')
+    i = 0
+    
+    while i < len(parts):
+        for key in params_dict.keys():
+            key_parts = key.split('_')
+            key_length = len(key_parts)
+            
+            if parts[i:i+key_length] == key_parts:
+                value_parts = []
+                j = i + key_length
+                
+                while j < len(parts) and not any(parts[j:j+len(k.split('_'))] == k.split('_') for k in params_dict.keys()):
+                    value_parts.append(parts[j])
+                    j += 1
+                
+                parsed_params[key] = '_'.join(value_parts)
+                i = j - 1
+                break
+        
+        i += 1
+
+    return parsed_params
+
+def generate_avg_seed_csv(full_res_csv_path, output_dir):
+    
+    df = pd.read_csv(full_res_csv_path)
+
+    if keep_metrics_only:
+        col_to_keep = ['circuit', 'arch']
+        col_to_keep.extend(list(params_dict.keys()))
+        col_to_keep.extend(parsed_metrics)
+        df = df.drop(columns=[col for col in df.columns if col not in col_to_keep])
+
+    # Check if '--seed' column is present
+    if '--seed' in df.columns:
+        # Determine the grouping keys: ['circuit', 'arch'] + keys from params_dict that are present in the dataframe
+        grouping_keys = ['circuit', 'arch'] + [key for key in params_dict.keys() if key in df.columns and key != "--seed"]
+        
+        # Group by specified keys and compute the mean for numeric columns
+        df_grouped = df.groupby(grouping_keys).mean(numeric_only=True).reset_index()
+        
+        # Drop the '--seed' column if it exists
+        if '--seed' in df_grouped.columns:
+            df_grouped.drop(columns=['--seed'], inplace=True)
+    else:
+        df_grouped = df
+
+    # Save the resulting dataframe to a CSV file
+    avg_seed_csv_path = os.path.join(output_dir, "avg_seed.csv")
+    df_grouped.to_csv(avg_seed_csv_path, index=False)
+
+def generate_geomean_res_csv(full_res_csv_path, output_dir, params_dict):
+    df = pd.read_csv(full_res_csv_path)
+
+    param_columns = [key for key in params_dict.keys() if key != '--seed']
+    non_param_columns = [col for col in df.columns if col not in param_columns]
+
+    geomean_df = df.groupby(param_columns).agg(
+        {col: (lambda x: '' if x.dtype == 'object' else safe_gmean(x)) for col in non_param_columns}
+    ).reset_index()
+
+    geomean_df.drop(columns=['circuit'], inplace=True)
+    geomean_df.drop(columns=['arch'], inplace=True)
+
+    geomean_res_csv_path = os.path.join(output_dir, "geomean_res.csv")
+    geomean_df.to_csv(geomean_res_csv_path, index=False)
+
+def main():
+    if len(sys.argv) < 3:
+        print("Usage: script.py <option> <path_to_directory>")
+        sys.exit(1)
+    
+    option = sys.argv[1]
+    directory_path = sys.argv[2]
+    
+    if option == "--generate":
+        # Generate the combinations
+        lines = generate_combinations(params_dict)
+        
+        # Define the path to the config file
+        config_path = os.path.join(directory_path, "config", "config.txt")
+        
+        # Ensure the config directory exists
+        os.makedirs(os.path.dirname(config_path), exist_ok=True)
+        
+        # Append the lines to the config file
+        with open(config_path, "a") as file:
+            file.writelines(lines)
+        
+        print(f"Appended lines to {config_path}")
+    
+    elif option == "--parse":
+        parse_results(directory_path, params_dict)
+    
+    else:
+        print("Invalid option. Use --generate or --parse")
+        sys.exit(1)
+
+if __name__ == "__main__":
+    main()
+

From 476dc889b606ee3d00658983e5709cba9d76b323 Mon Sep 17 00:00:00 2001
From: MohamedElgammal <mohamed.elgammal@mail.utornto.ca>
Date: Sun, 11 Aug 2024 17:25:25 -0400
Subject: [PATCH 2/8] Fixing linting issues

---
 vtr_flow/scripts/tuning_runs/README.md       |   2 +-
 vtr_flow/scripts/tuning_runs/control_runs.py | 224 ++++++++++++-------
 2 files changed, 140 insertions(+), 86 deletions(-)

diff --git a/vtr_flow/scripts/tuning_runs/README.md b/vtr_flow/scripts/tuning_runs/README.md
index eebbcdf2670..67a6cdb5953 100644
--- a/vtr_flow/scripts/tuning_runs/README.md
+++ b/vtr_flow/scripts/tuning_runs/README.md
@@ -2,7 +2,7 @@ A script used to run tuning experiments with multiple parameters.
 
 Steps to use:
 =============
-    1) edit the first section of the script by setting `params_dict` dictionary to the parameters that you want to sweep and the corresponding values that you want to try. If you want the resulting spreadheet to include specific metrics, set `keep_metrics_only` variable to `True` and the metrics that you care about in `parsed_metrics`. If you want the full parsed result sheet, set `keep_metrics_only` to `False`
+    1) edit the first section of the script by setting `PARAMS_DICT` dictionary to the parameters that you want to sweep and the corresponding values that you want to try. If you want the resulting spreadheet to include specific metrics, set `KEEP_METRICS_ONLY` variable to `True` and the metrics that you care about in `parsed_metrics`. If you want the full parsed result sheet, set `KEEP_METRICS_ONLY` to `False`
 
     2) run the script as follows:
 '''
diff --git a/vtr_flow/scripts/tuning_runs/control_runs.py b/vtr_flow/scripts/tuning_runs/control_runs.py
index 5588a2a937b..4f1304ce399 100755
--- a/vtr_flow/scripts/tuning_runs/control_runs.py
+++ b/vtr_flow/scripts/tuning_runs/control_runs.py
@@ -1,3 +1,7 @@
+#!/usr/bin/env python3
+
+""" This module controls and parses the large runs that includes
+sweeping multiple parameters. """
 import itertools
 import os
 import sys
@@ -7,144 +11,181 @@
 from scipy import stats
 
 # Define the global dictionary
-params_dict = {
+PARAMS_DICT = {
     "--seed": [1, 2],
     "--place_algorithm": ["criticality_timing"],
-    "--place_agent_epsilon": [0.3]
+    "--place_agent_epsilon": [0.3],
 }
 
 # Set to True if you only care about specific metrics
-keep_metrics_only = True
-parsed_metrics = ["num_io", "num_LAB"]
+KEEP_METRICS_ONLY = True
+PARSED_METRICS = ["num_io", "num_LAB"]
 
 
 def safe_gmean(series):
+    """Calculate the geomeans of a series in a safe way even for large numbers"""
     series = series.replace({0: np.nan})
     return stats.gmean(series.dropna())
 
-def generate_combinations(params_dict):
-    keys = list(params_dict.keys())
-    values = list(params_dict.values())
+
+def generate_combinations():
+    """Generates all the parameter combinations between the input parameters values."""
+    keys = list(PARAMS_DICT.keys())
+    values = list(PARAMS_DICT.values())
     combinations = list(itertools.product(*values))
-    
+
     lines = []
     for combination in combinations:
-        params_str = ' '.join(f"{key} {value}" for key, value in zip(keys, combination))
+        params_str = " ".join(f"{key} {value}" for key, value in zip(keys, combination))
         lines.append(f"script_params_list_add={params_str}\n")
     return lines
 
-def parse_results(input_path, params_dict):
+
+def parse_results(input_path):
+    """
+    Parse the output results
+    """
     # Find the runXXX directory with the largest XXX
-    run_dirs = [d for d in os.listdir(input_path) if d.startswith("run") and d[3:].isdigit()]
+    run_dirs = [
+        d for d in os.listdir(input_path) if d.startswith("run") and d[3:].isdigit()
+    ]
     if not run_dirs:
         print("No runXXX directories found in the specified input path.")
         sys.exit(1)
-    
-    largest_run_dir = max(run_dirs, key=lambda d: int(d[3:]))
-    largest_run_path = os.path.join(input_path, largest_run_dir)
-    
+
+    largest_run_path = os.path.join(input_path, max(run_dirs, key=lambda d: int(d[3:])))
+
     # Path to parse_results.txt and full_res.csv
-    parse_results_path = os.path.join(largest_run_path, "parse_results.txt")
     full_res_csv_path = os.path.join(largest_run_path, "full_res.csv")
-    
-    if not os.path.exists(parse_results_path):
-        print(f"{parse_results_path} not found.")
+
+    if not os.path.exists(os.path.join(largest_run_path, "parse_results.txt")):
+        print(f"parse_results.txt not found.")
         sys.exit(1)
-    
+
     # Read the parse_results.txt file and write to full_res.csv
-    with open(parse_results_path, "r") as txt_file, open(full_res_csv_path, "w", newline='') as csv_file:
-        reader = csv.reader(txt_file, delimiter='\t')
+    with open(
+        os.path.join(largest_run_path, "parse_results.txt"), "r"
+    ) as txt_file, open(full_res_csv_path, "w", newline="") as csv_file:
+        reader = csv.reader(txt_file, delimiter="\t")
         writer = csv.writer(csv_file)
-        
+
         headers = next(reader)
         script_params_index = headers.index("script_params")
-        
-        # Create new headers with params_dict keys
-        new_headers = headers[:script_params_index] + list(params_dict.keys()) + headers[script_params_index + 1:]
+
+        # Create new headers with PARAMS_DICT keys
+        new_headers = (
+            headers[:script_params_index]
+            + list(PARAMS_DICT.keys())
+            + headers[script_params_index + 1 :]
+        )
         writer.writerow(new_headers)
-        
+
         for row in reader:
             script_params_value = row[script_params_index]
-            script_params_dict = parse_script_params(script_params_value, params_dict)
-            new_row = row[:script_params_index] + [script_params_dict.get(key, '') for key in params_dict.keys()] + row[script_params_index + 1:]
+            script_params_dict = parse_script_params(script_params_value)
+            new_row = (
+                row[:script_params_index]
+                + [script_params_dict.get(key, "") for key in PARAMS_DICT]
+                + row[script_params_index + 1 :]
+            )
             writer.writerow(new_row)
-    
-    print(f"Converted {parse_results_path} to {full_res_csv_path}")
-    
+
+    print(f"Converted parse_results.txt to {full_res_csv_path}")
+
     # Generate avg_seed.csv if --seed column exists
     generate_avg_seed_csv(full_res_csv_path, largest_run_path)
     print(f"Generated average seed results")
 
-    # Generate gmean_res.csv 
-    generate_geomean_res_csv(os.path.join(largest_run_path, "avg_seed.csv"), largest_run_path, params_dict)
+    # Generate gmean_res.csv
+    generate_geomean_res_csv(
+        os.path.join(largest_run_path, "avg_seed.csv"), largest_run_path
+    )
     print(f"Generated geometric average results over all the circuits")
 
     generate_xlsx(largest_run_path)
     print(f"Generated xlsx that merges all the result csv files")
 
+
 def generate_xlsx(largest_run_path):
-    csv_files = [os.path.join(largest_run_path, "full_res.csv"),
-                 os.path.join(largest_run_path, "avg_seed.csv"),
-                 os.path.join(largest_run_path, "geomean_res.csv")]
+    """Generate a xlsx file that includes the full results, average results over the seed
+    and the geometrically averaged results over all the benchmarks."""
+
+    csv_files = [
+        os.path.join(largest_run_path, "full_res.csv"),
+        os.path.join(largest_run_path, "avg_seed.csv"),
+        os.path.join(largest_run_path, "geomean_res.csv"),
+    ]
     sheet_names = ["Full res", "Avg. seeds", "Summary"]
-    output_excel_file = os.path.join(largest_run_path, "summary.xlsx") 
+    output_excel_file = os.path.join(largest_run_path, "summary.xlsx")
     # Create an Excel writer object
-    with pd.ExcelWriter(output_excel_file) as writer:
+    # pylint: disable=abstract-class-instantiated
+    with pd.ExcelWriter(output_excel_file, engine="xlsxwriter") as writer:
         for csv_file, sheet_name in zip(csv_files, sheet_names):
             # Read each CSV file
             df = pd.read_csv(csv_file)
-    
+
             # Write each DataFrame to a different sheet
             df.to_excel(writer, sheet_name=sheet_name, index=False)
-   
-def parse_script_params(script_params, params_dict):
-    parsed_params = {key: '' for key in params_dict.keys()}
-    
-    parts = script_params.split('_')
+
+
+def parse_script_params(script_params):
+    """Helper function to parse the script params values from earch row in
+    the parse_results.txt"""
+
+    parsed_params = {key: "" for key in PARAMS_DICT}
+
+    parts = script_params.split("_")
     i = 0
-    
+
     while i < len(parts):
-        for key in params_dict.keys():
-            key_parts = key.split('_')
+        for key in PARAMS_DICT:
+            key_parts = key.split("_")
             key_length = len(key_parts)
-            
-            if parts[i:i+key_length] == key_parts:
+
+            if parts[i : i + key_length] == key_parts:
                 value_parts = []
                 j = i + key_length
-                
-                while j < len(parts) and not any(parts[j:j+len(k.split('_'))] == k.split('_') for k in params_dict.keys()):
+
+                while j < len(parts) and not any(
+                    parts[j : j + len(k.split("_"))] == k.split("_")
+                    for k in PARAMS_DICT
+                ):
                     value_parts.append(parts[j])
                     j += 1
-                
-                parsed_params[key] = '_'.join(value_parts)
+
+                parsed_params[key] = "_".join(value_parts)
                 i = j - 1
                 break
-        
+
         i += 1
 
     return parsed_params
 
+
 def generate_avg_seed_csv(full_res_csv_path, output_dir):
-    
+    """Generate the average results over the seeds"""
     df = pd.read_csv(full_res_csv_path)
 
-    if keep_metrics_only:
-        col_to_keep = ['circuit', 'arch']
-        col_to_keep.extend(list(params_dict.keys()))
-        col_to_keep.extend(parsed_metrics)
+    if KEEP_METRICS_ONLY:
+        col_to_keep = ["circuit", "arch"]
+        col_to_keep.extend(list(PARAMS_DICT.keys()))
+        col_to_keep.extend(PARSED_METRICS)
         df = df.drop(columns=[col for col in df.columns if col not in col_to_keep])
 
     # Check if '--seed' column is present
-    if '--seed' in df.columns:
-        # Determine the grouping keys: ['circuit', 'arch'] + keys from params_dict that are present in the dataframe
-        grouping_keys = ['circuit', 'arch'] + [key for key in params_dict.keys() if key in df.columns and key != "--seed"]
-        
+    if "--seed" in df.columns:
+        # Determine the grouping keys: ['circuit', 'arch'] + keys from PARAMS_DICT that
+        # are present in the dataframe
+        grouping_keys = ["circuit", "arch"] + [
+            key for key in PARAMS_DICT if key in df.columns and key != "--seed"
+        ]
+
         # Group by specified keys and compute the mean for numeric columns
         df_grouped = df.groupby(grouping_keys).mean(numeric_only=True).reset_index()
-        
+
         # Drop the '--seed' column if it exists
-        if '--seed' in df_grouped.columns:
-            df_grouped.drop(columns=['--seed'], inplace=True)
+        if "--seed" in df_grouped.columns:
+            df_grouped.drop(columns=["--seed"], inplace=True)
     else:
         df_grouped = df
 
@@ -152,53 +193,66 @@ def generate_avg_seed_csv(full_res_csv_path, output_dir):
     avg_seed_csv_path = os.path.join(output_dir, "avg_seed.csv")
     df_grouped.to_csv(avg_seed_csv_path, index=False)
 
-def generate_geomean_res_csv(full_res_csv_path, output_dir, params_dict):
+
+def generate_geomean_res_csv(full_res_csv_path, output_dir):
+    """Generate the geometric average results over the different circuits"""
+
     df = pd.read_csv(full_res_csv_path)
 
-    param_columns = [key for key in params_dict.keys() if key != '--seed']
+    param_columns = [key for key in PARAMS_DICT if key != "--seed"]
     non_param_columns = [col for col in df.columns if col not in param_columns]
 
-    geomean_df = df.groupby(param_columns).agg(
-        {col: (lambda x: '' if x.dtype == 'object' else safe_gmean(x)) for col in non_param_columns}
-    ).reset_index()
+    geomean_df = (
+        df.groupby(param_columns)
+        .agg(
+            {
+                col: (lambda x: "" if x.dtype == "object" else safe_gmean(x))
+                for col in non_param_columns
+            }
+        )
+        .reset_index()
+    )
 
-    geomean_df.drop(columns=['circuit'], inplace=True)
-    geomean_df.drop(columns=['arch'], inplace=True)
+    geomean_df.drop(columns=["circuit"], inplace=True)
+    geomean_df.drop(columns=["arch"], inplace=True)
 
     geomean_res_csv_path = os.path.join(output_dir, "geomean_res.csv")
     geomean_df.to_csv(geomean_res_csv_path, index=False)
 
+
 def main():
+    """Main function"""
+
     if len(sys.argv) < 3:
         print("Usage: script.py <option> <path_to_directory>")
         sys.exit(1)
-    
+
     option = sys.argv[1]
     directory_path = sys.argv[2]
-    
+
     if option == "--generate":
         # Generate the combinations
-        lines = generate_combinations(params_dict)
-        
+        lines = generate_combinations()
+
         # Define the path to the config file
         config_path = os.path.join(directory_path, "config", "config.txt")
-        
+
         # Ensure the config directory exists
         os.makedirs(os.path.dirname(config_path), exist_ok=True)
-        
+
         # Append the lines to the config file
         with open(config_path, "a") as file:
             file.writelines(lines)
-        
+
         print(f"Appended lines to {config_path}")
-    
+
     elif option == "--parse":
-        parse_results(directory_path, params_dict)
-    
+        parse_results(directory_path)
+
     else:
         print("Invalid option. Use --generate or --parse")
         sys.exit(1)
 
+
 if __name__ == "__main__":
     main()
-

From 1e0d5f3b426b3808e2f1518697a70a16ba2956a7 Mon Sep 17 00:00:00 2001
From: MohamedElgammal <mohamed.elgammal@mail.utornto.ca>
Date: Mon, 12 Aug 2024 19:21:15 -0400
Subject: [PATCH 3/8] Fix the remaining linting issues and add the required
 libraries to requirements.txt

---
 requirements.txt                             | 6 ++++--
 vtr_flow/scripts/tuning_runs/control_runs.py | 8 ++++----
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 1ac9597956f..e6b107c0625 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,11 +1,13 @@
 prettytable
 lxml
 psutil
-
+pandas
+numpy
+scipy
 # Python linter and formatter
 click==8.0.2 # Our version of black needs an older version of click (https://stackoverflow.com/questions/71673404/importerror-cannot-import-name-unicodefun-from-click)
 black==21.4b0
 pylint==2.7.4
 
 # Surelog
-orderedmultidict
\ No newline at end of file
+orderedmultidict
diff --git a/vtr_flow/scripts/tuning_runs/control_runs.py b/vtr_flow/scripts/tuning_runs/control_runs.py
index 4f1304ce399..c50e93a8bb1 100755
--- a/vtr_flow/scripts/tuning_runs/control_runs.py
+++ b/vtr_flow/scripts/tuning_runs/control_runs.py
@@ -59,7 +59,7 @@ def parse_results(input_path):
     full_res_csv_path = os.path.join(largest_run_path, "full_res.csv")
 
     if not os.path.exists(os.path.join(largest_run_path, "parse_results.txt")):
-        print(f"parse_results.txt not found.")
+        print("File parse_results.txt not found.")
         sys.exit(1)
 
     # Read the parse_results.txt file and write to full_res.csv
@@ -94,16 +94,16 @@ def parse_results(input_path):
 
     # Generate avg_seed.csv if --seed column exists
     generate_avg_seed_csv(full_res_csv_path, largest_run_path)
-    print(f"Generated average seed results")
+    print("Generated average seed results")
 
     # Generate gmean_res.csv
     generate_geomean_res_csv(
         os.path.join(largest_run_path, "avg_seed.csv"), largest_run_path
     )
-    print(f"Generated geometric average results over all the circuits")
+    print("Generated geometric average results over all the circuits")
 
     generate_xlsx(largest_run_path)
-    print(f"Generated xlsx that merges all the result csv files")
+    print("Generated xlsx that merges all the result csv files")
 
 
 def generate_xlsx(largest_run_path):

From 70acb71ce03d360ba66609b3ee8a8a6265935b69 Mon Sep 17 00:00:00 2001
From: MohamedElgammal <mohamed.elgammal@mail.utornto.ca>
Date: Mon, 12 Aug 2024 19:29:50 -0400
Subject: [PATCH 4/8] try fixing linting issues

---
 vtr_flow/scripts/tuning_runs/control_runs.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/vtr_flow/scripts/tuning_runs/control_runs.py b/vtr_flow/scripts/tuning_runs/control_runs.py
index c50e93a8bb1..5bc9c25036b 100755
--- a/vtr_flow/scripts/tuning_runs/control_runs.py
+++ b/vtr_flow/scripts/tuning_runs/control_runs.py
@@ -165,6 +165,7 @@ def parse_script_params(script_params):
 def generate_avg_seed_csv(full_res_csv_path, output_dir):
     """Generate the average results over the seeds"""
     df = pd.read_csv(full_res_csv_path)
+    assert isinstance(df, pd.DataFrame)
 
     if KEEP_METRICS_ONLY:
         col_to_keep = ["circuit", "arch"]

From 58e1f6b8903d0f0d516ea5dba9251de7e956f14f Mon Sep 17 00:00:00 2001
From: MohamedElgammal <mohamed.elgammal@mail.utornto.ca>
Date: Mon, 12 Aug 2024 19:36:39 -0400
Subject: [PATCH 5/8] try fixing lint issues

---
 vtr_flow/scripts/tuning_runs/control_runs.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vtr_flow/scripts/tuning_runs/control_runs.py b/vtr_flow/scripts/tuning_runs/control_runs.py
index 5bc9c25036b..3916096ad34 100755
--- a/vtr_flow/scripts/tuning_runs/control_runs.py
+++ b/vtr_flow/scripts/tuning_runs/control_runs.py
@@ -164,7 +164,7 @@ def parse_script_params(script_params):
 
 def generate_avg_seed_csv(full_res_csv_path, output_dir):
     """Generate the average results over the seeds"""
-    df = pd.read_csv(full_res_csv_path)
+    df: Optional[pd.DataFrame] = pd.read_csv(full_res_csv_path)
     assert isinstance(df, pd.DataFrame)
 
     if KEEP_METRICS_ONLY:

From 2431ec2403eef8de3b3768c7157ccaa203f2df1d Mon Sep 17 00:00:00 2001
From: MohamedElgammal <mohamed.elgammal@mail.utornto.ca>
Date: Mon, 12 Aug 2024 19:50:19 -0400
Subject: [PATCH 6/8] try fixing lint issues

---
 vtr_flow/scripts/tuning_runs/control_runs.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/vtr_flow/scripts/tuning_runs/control_runs.py b/vtr_flow/scripts/tuning_runs/control_runs.py
index 3916096ad34..71c596978e5 100755
--- a/vtr_flow/scripts/tuning_runs/control_runs.py
+++ b/vtr_flow/scripts/tuning_runs/control_runs.py
@@ -6,6 +6,7 @@
 import os
 import sys
 import csv
+from typing import Optional
 import pandas as pd
 import numpy as np
 from scipy import stats

From d63d2c2e36daafbac1328eee2979c1c333ff0a31 Mon Sep 17 00:00:00 2001
From: MohamedElgammal <mohamed.elgammal@mail.utornto.ca>
Date: Mon, 12 Aug 2024 19:59:41 -0400
Subject: [PATCH 7/8] try fix lint

---
 vtr_flow/scripts/tuning_runs/control_runs.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/vtr_flow/scripts/tuning_runs/control_runs.py b/vtr_flow/scripts/tuning_runs/control_runs.py
index 71c596978e5..efe48621891 100755
--- a/vtr_flow/scripts/tuning_runs/control_runs.py
+++ b/vtr_flow/scripts/tuning_runs/control_runs.py
@@ -6,7 +6,6 @@
 import os
 import sys
 import csv
-from typing import Optional
 import pandas as pd
 import numpy as np
 from scipy import stats
@@ -165,14 +164,16 @@ def parse_script_params(script_params):
 
 def generate_avg_seed_csv(full_res_csv_path, output_dir):
     """Generate the average results over the seeds"""
-    df: Optional[pd.DataFrame] = pd.read_csv(full_res_csv_path)
+    df = pd.read_csv(full_res_csv_path)
     assert isinstance(df, pd.DataFrame)
 
     if KEEP_METRICS_ONLY:
         col_to_keep = ["circuit", "arch"]
         col_to_keep.extend(list(PARAMS_DICT.keys()))
         col_to_keep.extend(PARSED_METRICS)
-        df = df.drop(columns=[col for col in df.columns if col not in col_to_keep])
+        df = df.drop(
+            columns=[col for col in df.columns if col not in col_to_keep]
+        )  # pylint: disable=no-member
 
     # Check if '--seed' column is present
     if "--seed" in df.columns:
@@ -204,6 +205,7 @@ def generate_geomean_res_csv(full_res_csv_path, output_dir):
     param_columns = [key for key in PARAMS_DICT if key != "--seed"]
     non_param_columns = [col for col in df.columns if col not in param_columns]
 
+    # pylint: disable=no-member
     geomean_df = (
         df.groupby(param_columns)
         .agg(

From e6adc49cb20b69a7868dbff55a8433581e9ccc0c Mon Sep 17 00:00:00 2001
From: MohamedElgammal <mohamed.elgammal@mail.utornto.ca>
Date: Mon, 12 Aug 2024 20:04:47 -0400
Subject: [PATCH 8/8] fix lint runs

---
 vtr_flow/scripts/tuning_runs/control_runs.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/vtr_flow/scripts/tuning_runs/control_runs.py b/vtr_flow/scripts/tuning_runs/control_runs.py
index efe48621891..825b3b7609c 100755
--- a/vtr_flow/scripts/tuning_runs/control_runs.py
+++ b/vtr_flow/scripts/tuning_runs/control_runs.py
@@ -172,8 +172,9 @@ def generate_avg_seed_csv(full_res_csv_path, output_dir):
         col_to_keep.extend(list(PARAMS_DICT.keys()))
         col_to_keep.extend(PARSED_METRICS)
         df = df.drop(
+            # pylint: disable=no-member
             columns=[col for col in df.columns if col not in col_to_keep]
-        )  # pylint: disable=no-member
+        )
 
     # Check if '--seed' column is present
     if "--seed" in df.columns: