TimotheeMathieu · TimotheeMathieu · Dec 23, 2023 · Dec 26, 2023 · Dec 26, 2023 · Dec 27, 2023
diff --git a/adastop/benchopt.py b/adastop/benchopt.py
@@ -0,0 +1,15 @@
+import pandas as pd
+
+
+def process_benchopt(file):
+    """
+    For now, suppose that there is only one dataset
+    """
+    df = pd.read_parquet(file)
+    df= df[["solver_name",'objective_value','idx_rep']]
+    df_ret = { name : [] for name in df["solver_name"].unique()}
+    for rep in df["idx_rep"].unique():
+        for solver in df["solver_name"].unique():
+            df_rep_solver = df.loc[ (df["solver_name"]==solver) & (df["idx_rep"]==rep)]
+            df_ret[solver].append(df_rep_solver['objective_test_loss'].iloc[-1])
+    return pd.DataFrame(df_ret)
diff --git a/adastop/cli.py b/adastop/cli.py
@@ -1,41 +1,25 @@
 import click
 import pickle
+import yaml
 import os
 from pathlib import Path
+import subprocess
 import pandas as pd
 import numpy as np
 import matplotlib.pyplot as plt
-
+from .benchopt import process_benchopt
 from .compare_agents import MultipleAgentsComparator
 
-LITTER_FILE = ".adastop_comparator.pkl"
 
-@click.group()
-@click.pass_context
-def adastop(ctx):
-    """
-    Program to perform adaptive stopping algorithm using csv file intput_file.
+LITTER_FILE = ".adastop_comparator.pkl"
 
-    Use adastop sub-command --help to have help for a specific sub-command
-    """
-    pass
 
-@adastop.command()
-@click.option("--n-groups", default=5, show_default=True, help="Number of groups.")
-@click.option("--n-permutations", default=10000, show_default=True, help="Number of random permutations.")
-@click.option("--alpha", default=0.05, show_default=True, help="Type I error.")
-@click.option("--beta", default=0.0, show_default=True, help="early accept parameter.")
-@click.option("--seed", default=None, type=int, show_default=True, help="Random seed.")
-@click.option("--compare-to-first", is_flag=True, show_default=True, default=False, help="Compare all algorithms to the first algorithm.")
-@click.argument('input_file',required = True, type=str)
-@click.pass_context
-def compare(ctx, input_file, n_groups, n_permutations, alpha, beta, seed, compare_to_first):
+def compare_data(path_lf, df, n_groups, n_permutations, alpha, beta, seed, compare_to_first):
     """
-    Perform one step of adaptive stopping algorithm using csv file intput_file.
+    Perform one step of adaptive stopping algorithm using the dataframe df.
     At first call, the comparator will be initialized with the arguments passed and then it will be saved to a save file in `.adastop_comparator.pkl`.
     """
-    path_lf = Path(input_file).parent.absolute() / LITTER_FILE
-    df = pd.read_csv(input_file, index_col=0)
+
     n_fits_per_group = len(df) 
     n_agents = len(df.columns)
     if compare_to_first:
@@ -48,7 +32,13 @@ def compare(ctx, input_file, n_groups, n_permutations, alpha, beta, seed, compar
         with open(path_lf, 'rb') as fp:
             comparator = pickle.load(fp)
 
-        Z = [np.hstack([comparator.eval_values[agent], df[agent]]) for agent in df.columns]
+        names = []
+        for i in range(len(comparator.agent_names)):
+            if i in comparator.current_comparisons.ravel():
+                names.append(comparator.agent_names[i])
+
+
+        Z = [np.hstack([comparator.eval_values[agent], df[agent]]) for agent in names]
         if len(Z[0]) > comparator.K * n_fits_per_group:
             raise ValueError('Error: you tried to use more group than what was initially declared, this is not allowed by the theory.')
         assert "continue" in list(comparator.decisions.values()), "Test finished at last iteration."
@@ -57,13 +47,15 @@ def compare(ctx, input_file, n_groups, n_permutations, alpha, beta, seed, compar
         comparator = MultipleAgentsComparator(n_fits_per_group, n_groups,
                                               n_permutations, comparisons,
                                               alpha, beta, seed)
-        Z = [df[agent].values for agent in df.columns]
+        names = df.columns
+
+        Z = [df[agent].values for agent in names]
 
-    data = {df.columns[i] : Z[i] for i in range(len(df.columns))}
+    data = {names[i] : Z[i] for i in range(len(names))}
     # recover also the data of agent that were decided.
     if comparator.agent_names is not None:
         for agent in comparator.agent_names:
-            if agent not in df.columns:
+            if agent not in data.keys():
                 data[agent]=comparator.eval_values[agent]
 
     comparator.partial_compare(data, False)
@@ -86,6 +78,100 @@ def compare(ctx, input_file, n_groups, n_permutations, alpha, beta, seed, compar
         pickle.dump(comparator, fp)
         click.echo("Comparator Saved")
 
+
+@click.group()
+@click.pass_context
+def adastop(ctx):
+    """
+    Program to perform adaptive stopping algorithm using csv file intput_file.
+
+    Use adastop sub-command --help to have help for a specific sub-command
+    """
+    pass
+
+@adastop.command()
+@click.option("--n-groups", default=5, show_default=True, help="Number of groups.")
+@click.option("--n-permutations", default=10000, show_default=True, help="Number of random permutations.")
+@click.option("--alpha", default=0.05, show_default=True, help="Type I error.")
+@click.option("--beta", default=0.0, show_default=True, help="early accept parameter.")
+@click.option("--seed", default=None, type=int, show_default=True, help="Random seed.")
+@click.option("--compare-to-first", is_flag=True, show_default=True, default=False, help="Compare all algorithms to the first algorithm.")
+@click.argument('input_file',required = True, type=str)
+@click.pass_context
+def compare(ctx, input_file, n_groups, n_permutations, alpha, beta, seed, compare_to_first):
+    """
+    Perform one step of adaptive stopping algorithm using csv file intput_file.
+    At first call, the comparator will be initialized with the arguments passed and then it will be saved to a save file in `.adastop_comparator.pkl`.
+    """
+    path_lf = Path(input_file).parent.absolute() / LITTER_FILE
+    df = pd.read_csv(input_file, index_col=0)
+    compare_data(path_lf, df,  n_groups, n_permutations, alpha, beta, seed, compare_to_first)
+
+
+@adastop.command()
+@click.option("--n-groups", default=5, show_default=True, help="Number of groups.")
+@click.option("--n-permutations", default=10000, show_default=True, help="Number of random permutations.")
+@click.option("--alpha", default=0.05, show_default=True, help="Type I error.")
+@click.option("--beta", default=0.0, show_default=True, help="early accept parameter.")
+@click.option("--seed", default=None, type=int, show_default=True, help="Random seed.")
+@click.option("--compare-to-first", is_flag=True, show_default=True, default=False, help="Compare all algorithms to the first algorithm.")
+@click.option("--size-group", default=6, show_default=True, help="Number of groups.")
+@click.argument('config_file',required = True, type=str)
+@click.pass_context
+def compare_benchopt(ctx, config_file, size_group, n_groups, n_permutations, alpha, beta, seed, compare_to_first):
+    """
+    Perform one step of computing benchmark and then adaptive stopping algorithm.
+    The benchmark is supposed to be in the current directory.
+
+    WARNING: still experimental.
+    """
+    path_lf = Path(config_file).parent.absolute() / LITTER_FILE
+
+
+    if os.path.isfile(path_lf):
+        with open(path_lf, 'rb') as fp:
+            comparator = pickle.load(fp)
+            k = comparator.k
+    else:
+        k = 0
+
+    # if this is not first group, load data for comparator.
+    if os.path.isfile( "outputs/adastop_result_file_"+str(k)+".csv"):
+        df = pd.read_csv("outputs/adastop_result_file_"+str(k)+".csv", index_col=0)
+    else:
+        if k > 0:
+            undecided_solvers = []
+            for i in range(len(comparator.agent_names)):
+                if i in comparator.current_comparisons.ravel():
+                    undecided_solvers.append(comparator.agent_names[i])
+
+            with open(config_file, 'r') as file:
+                config = yaml.safe_load(file)
+
+
+            config['solver']=undecided_solvers
+
+            with open("/tmp/config_benchopt.yml", 'w') as file:
+                config = yaml.dump(config, file)
+
+
+            print("Doing comparisons for "+str(len(undecided_solvers))+ " solvers: "+", ".join(undecided_solvers))
+            subprocess.check_output(["benchopt", "run",  ".",  "--config",  "/tmp/config_benchopt.yml",
+                         "--env", "-r",  str(size_group), 
+                        "--output", "adastop_result_file_"+str(k)])
+
+        else:
+            # initially, run everything
+            subprocess.check_output(["benchopt", "run",  ".",  "--config",
+                        config_file, "--env", "-r",  str(size_group), 
+                        "--output", "adastop_result_file_"+str(k)])
+
+    df = process_benchopt("outputs/adastop_result_file_"+str(k)+".parquet")
+    df.to_csv("outputs/adastop_result_file_"+str(k)+".csv")
+
+    compare_data(path_lf, df,  n_groups, n_permutations, alpha, beta, seed, compare_to_first)
+
+
 @adastop.command()
 @click.argument('folder',required = True, type=str)
 @click.pass_context

diff --git a/adastop/compare_agents.py b/adastop/compare_agents.py
@@ -270,6 +270,8 @@ def partial_compare(self, eval_values, verbose=True):
             admissible_values_sup = values[
                 self.level_spent + icumulative_probas <= clevel
             ]
+            if len(np.unique(values)) < 1/clevel:
+                print("WARNING: too many values are equal, or size of group too small, the test may not be precise.")
 
             if len(admissible_values_sup) > 0:
                 bk_sup = admissible_values_sup[0]  # the minimum admissible value
@@ -324,7 +326,6 @@ def partial_compare(self, eval_values, verbose=True):
                                         - np.mean(Z[comp[1]][: ((k + 1) * self.n[comp[1]])]
                                   )
                                     )
-
             if Tmax > bk_sup:
                 id_reject = np.arange(len(current_decisions))[current_decisions== "continue"][imax]
                 current_decisions[id_reject] = "reject"

diff --git a/adastop/data_processing.py b/adastop/data_processing.py
@@ -0,0 +1,15 @@
+import pandas as pd
+
+
+def process_benchopt(file):
+    """
+    For now, suppose that there is only one dataset
+    """
+    df = pd.read_parquet(file)
+    df= df[["solver_name",'objective_value','idx_rep']]
+    df_ret = { name : [] for name in df["solver_name"].unique()}
+    for rep in df["idx_rep"].unique():
+        for solver in df["solver_name"].unique():
+            df_rep_solver = df.loc[ (df["solver_name"]==solver) & (df["idx_rep"]==rep)]
+            df_ret[solver].append(df_rep_solver['objective_value'].iloc[-1])
+    return pd.DataFrame(df_ret)