diff --git a/adastop/benchopt.py b/adastop/benchopt.py new file mode 100644 index 0000000..204b398 --- /dev/null +++ b/adastop/benchopt.py @@ -0,0 +1,15 @@ +import pandas as pd + + +def process_benchopt(file): + """ + For now, suppose that there is only one dataset + """ + df = pd.read_parquet(file) + df= df[["solver_name",'objective_value','idx_rep']] + df_ret = { name : [] for name in df["solver_name"].unique()} + for rep in df["idx_rep"].unique(): + for solver in df["solver_name"].unique(): + df_rep_solver = df.loc[ (df["solver_name"]==solver) & (df["idx_rep"]==rep)] + df_ret[solver].append(df_rep_solver['objective_test_loss'].iloc[-1]) + return pd.DataFrame(df_ret) diff --git a/adastop/cli.py b/adastop/cli.py index 18b29af..3b53b2a 100644 --- a/adastop/cli.py +++ b/adastop/cli.py @@ -1,41 +1,25 @@ import click import pickle +import yaml import os from pathlib import Path +import subprocess import pandas as pd import numpy as np import matplotlib.pyplot as plt - +from .benchopt import process_benchopt from .compare_agents import MultipleAgentsComparator -LITTER_FILE = ".adastop_comparator.pkl" -@click.group() -@click.pass_context -def adastop(ctx): - """ - Program to perform adaptive stopping algorithm using csv file intput_file. +LITTER_FILE = ".adastop_comparator.pkl" - Use adastop sub-command --help to have help for a specific sub-command - """ - pass -@adastop.command() -@click.option("--n-groups", default=5, show_default=True, help="Number of groups.") -@click.option("--n-permutations", default=10000, show_default=True, help="Number of random permutations.") -@click.option("--alpha", default=0.05, show_default=True, help="Type I error.") -@click.option("--beta", default=0.0, show_default=True, help="early accept parameter.") -@click.option("--seed", default=None, type=int, show_default=True, help="Random seed.") -@click.option("--compare-to-first", is_flag=True, show_default=True, default=False, help="Compare all algorithms to the first algorithm.") -@click.argument('input_file',required = True, type=str) -@click.pass_context -def compare(ctx, input_file, n_groups, n_permutations, alpha, beta, seed, compare_to_first): +def compare_data(path_lf, df, n_groups, n_permutations, alpha, beta, seed, compare_to_first): """ - Perform one step of adaptive stopping algorithm using csv file intput_file. + Perform one step of adaptive stopping algorithm using the dataframe df. At first call, the comparator will be initialized with the arguments passed and then it will be saved to a save file in `.adastop_comparator.pkl`. """ - path_lf = Path(input_file).parent.absolute() / LITTER_FILE - df = pd.read_csv(input_file, index_col=0) + n_fits_per_group = len(df) n_agents = len(df.columns) if compare_to_first: @@ -48,7 +32,13 @@ def compare(ctx, input_file, n_groups, n_permutations, alpha, beta, seed, compar with open(path_lf, 'rb') as fp: comparator = pickle.load(fp) - Z = [np.hstack([comparator.eval_values[agent], df[agent]]) for agent in df.columns] + names = [] + for i in range(len(comparator.agent_names)): + if i in comparator.current_comparisons.ravel(): + names.append(comparator.agent_names[i]) + + + Z = [np.hstack([comparator.eval_values[agent], df[agent]]) for agent in names] if len(Z[0]) > comparator.K * n_fits_per_group: raise ValueError('Error: you tried to use more group than what was initially declared, this is not allowed by the theory.') assert "continue" in list(comparator.decisions.values()), "Test finished at last iteration." @@ -57,13 +47,15 @@ def compare(ctx, input_file, n_groups, n_permutations, alpha, beta, seed, compar comparator = MultipleAgentsComparator(n_fits_per_group, n_groups, n_permutations, comparisons, alpha, beta, seed) - Z = [df[agent].values for agent in df.columns] + names = df.columns + + Z = [df[agent].values for agent in names] - data = {df.columns[i] : Z[i] for i in range(len(df.columns))} + data = {names[i] : Z[i] for i in range(len(names))} # recover also the data of agent that were decided. if comparator.agent_names is not None: for agent in comparator.agent_names: - if agent not in df.columns: + if agent not in data.keys(): data[agent]=comparator.eval_values[agent] comparator.partial_compare(data, False) @@ -86,6 +78,100 @@ def compare(ctx, input_file, n_groups, n_permutations, alpha, beta, seed, compar pickle.dump(comparator, fp) click.echo("Comparator Saved") + +@click.group() +@click.pass_context +def adastop(ctx): + """ + Program to perform adaptive stopping algorithm using csv file intput_file. + + Use adastop sub-command --help to have help for a specific sub-command + """ + pass + +@adastop.command() +@click.option("--n-groups", default=5, show_default=True, help="Number of groups.") +@click.option("--n-permutations", default=10000, show_default=True, help="Number of random permutations.") +@click.option("--alpha", default=0.05, show_default=True, help="Type I error.") +@click.option("--beta", default=0.0, show_default=True, help="early accept parameter.") +@click.option("--seed", default=None, type=int, show_default=True, help="Random seed.") +@click.option("--compare-to-first", is_flag=True, show_default=True, default=False, help="Compare all algorithms to the first algorithm.") +@click.argument('input_file',required = True, type=str) +@click.pass_context +def compare(ctx, input_file, n_groups, n_permutations, alpha, beta, seed, compare_to_first): + """ + Perform one step of adaptive stopping algorithm using csv file intput_file. + At first call, the comparator will be initialized with the arguments passed and then it will be saved to a save file in `.adastop_comparator.pkl`. + """ + path_lf = Path(input_file).parent.absolute() / LITTER_FILE + df = pd.read_csv(input_file, index_col=0) + compare_data(path_lf, df, n_groups, n_permutations, alpha, beta, seed, compare_to_first) + + +@adastop.command() +@click.option("--n-groups", default=5, show_default=True, help="Number of groups.") +@click.option("--n-permutations", default=10000, show_default=True, help="Number of random permutations.") +@click.option("--alpha", default=0.05, show_default=True, help="Type I error.") +@click.option("--beta", default=0.0, show_default=True, help="early accept parameter.") +@click.option("--seed", default=None, type=int, show_default=True, help="Random seed.") +@click.option("--compare-to-first", is_flag=True, show_default=True, default=False, help="Compare all algorithms to the first algorithm.") +@click.option("--size-group", default=6, show_default=True, help="Number of groups.") +@click.argument('config_file',required = True, type=str) +@click.pass_context +def compare_benchopt(ctx, config_file, size_group, n_groups, n_permutations, alpha, beta, seed, compare_to_first): + """ + Perform one step of computing benchmark and then adaptive stopping algorithm. + The benchmark is supposed to be in the current directory. + + WARNING: still experimental. + """ + path_lf = Path(config_file).parent.absolute() / LITTER_FILE + + + if os.path.isfile(path_lf): + with open(path_lf, 'rb') as fp: + comparator = pickle.load(fp) + k = comparator.k + else: + k = 0 + + # if this is not first group, load data for comparator. + if os.path.isfile( "outputs/adastop_result_file_"+str(k)+".csv"): + df = pd.read_csv("outputs/adastop_result_file_"+str(k)+".csv", index_col=0) + else: + if k > 0: + undecided_solvers = [] + for i in range(len(comparator.agent_names)): + if i in comparator.current_comparisons.ravel(): + undecided_solvers.append(comparator.agent_names[i]) + + with open(config_file, 'r') as file: + config = yaml.safe_load(file) + + + config['solver']=undecided_solvers + + with open("/tmp/config_benchopt.yml", 'w') as file: + config = yaml.dump(config, file) + + + print("Doing comparisons for "+str(len(undecided_solvers))+ " solvers: "+", ".join(undecided_solvers)) + subprocess.check_output(["benchopt", "run", ".", "--config", "/tmp/config_benchopt.yml", + "--env", "-r", str(size_group), + "--output", "adastop_result_file_"+str(k)]) + + else: + # initially, run everything + subprocess.check_output(["benchopt", "run", ".", "--config", + config_file, "--env", "-r", str(size_group), + "--output", "adastop_result_file_"+str(k)]) + + df = process_benchopt("outputs/adastop_result_file_"+str(k)+".parquet") + df.to_csv("outputs/adastop_result_file_"+str(k)+".csv") + + compare_data(path_lf, df, n_groups, n_permutations, alpha, beta, seed, compare_to_first) + + @adastop.command() @click.argument('folder',required = True, type=str) @click.pass_context diff --git a/adastop/compare_agents.py b/adastop/compare_agents.py index f9c81bd..551dcd0 100644 --- a/adastop/compare_agents.py +++ b/adastop/compare_agents.py @@ -270,6 +270,8 @@ def partial_compare(self, eval_values, verbose=True): admissible_values_sup = values[ self.level_spent + icumulative_probas <= clevel ] + if len(np.unique(values)) < 1/clevel: + print("WARNING: too many values are equal, or size of group too small, the test may not be precise.") if len(admissible_values_sup) > 0: bk_sup = admissible_values_sup[0] # the minimum admissible value @@ -324,7 +326,6 @@ def partial_compare(self, eval_values, verbose=True): - np.mean(Z[comp[1]][: ((k + 1) * self.n[comp[1]])] ) ) - if Tmax > bk_sup: id_reject = np.arange(len(current_decisions))[current_decisions== "continue"][imax] current_decisions[id_reject] = "reject" diff --git a/adastop/data_processing.py b/adastop/data_processing.py new file mode 100644 index 0000000..28bed52 --- /dev/null +++ b/adastop/data_processing.py @@ -0,0 +1,15 @@ +import pandas as pd + + +def process_benchopt(file): + """ + For now, suppose that there is only one dataset + """ + df = pd.read_parquet(file) + df= df[["solver_name",'objective_value','idx_rep']] + df_ret = { name : [] for name in df["solver_name"].unique()} + for rep in df["idx_rep"].unique(): + for solver in df["solver_name"].unique(): + df_rep_solver = df.loc[ (df["solver_name"]==solver) & (df["idx_rep"]==rep)] + df_ret[solver].append(df_rep_solver['objective_value'].iloc[-1]) + return pd.DataFrame(df_ret)