Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[FEAT] Benchopt compatibility tools #2

Open
wants to merge 14 commits into
base: main
Choose a base branch
from
15 changes: 15 additions & 0 deletions adastop/benchopt.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import pandas as pd


def process_benchopt(file):
"""
For now, suppose that there is only one dataset
"""
df = pd.read_parquet(file)
df= df[["solver_name",'objective_value','idx_rep']]
df_ret = { name : [] for name in df["solver_name"].unique()}
for rep in df["idx_rep"].unique():
for solver in df["solver_name"].unique():
df_rep_solver = df.loc[ (df["solver_name"]==solver) & (df["idx_rep"]==rep)]
df_ret[solver].append(df_rep_solver['objective_test_loss'].iloc[-1])
return pd.DataFrame(df_ret)
140 changes: 113 additions & 27 deletions adastop/cli.py
Original file line number Diff line number Diff line change
@@ -1,41 +1,25 @@
import click
import pickle
import yaml
import os
from pathlib import Path
import subprocess
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from .benchopt import process_benchopt
from .compare_agents import MultipleAgentsComparator

LITTER_FILE = ".adastop_comparator.pkl"

@click.group()
@click.pass_context
def adastop(ctx):
"""
Program to perform adaptive stopping algorithm using csv file intput_file.
LITTER_FILE = ".adastop_comparator.pkl"

Use adastop sub-command --help to have help for a specific sub-command
"""
pass

@adastop.command()
@click.option("--n-groups", default=5, show_default=True, help="Number of groups.")
@click.option("--n-permutations", default=10000, show_default=True, help="Number of random permutations.")
@click.option("--alpha", default=0.05, show_default=True, help="Type I error.")
@click.option("--beta", default=0.0, show_default=True, help="early accept parameter.")
@click.option("--seed", default=None, type=int, show_default=True, help="Random seed.")
@click.option("--compare-to-first", is_flag=True, show_default=True, default=False, help="Compare all algorithms to the first algorithm.")
@click.argument('input_file',required = True, type=str)
@click.pass_context
def compare(ctx, input_file, n_groups, n_permutations, alpha, beta, seed, compare_to_first):
def compare_data(path_lf, df, n_groups, n_permutations, alpha, beta, seed, compare_to_first):
"""
Perform one step of adaptive stopping algorithm using csv file intput_file.
Perform one step of adaptive stopping algorithm using the dataframe df.
At first call, the comparator will be initialized with the arguments passed and then it will be saved to a save file in `.adastop_comparator.pkl`.
"""
path_lf = Path(input_file).parent.absolute() / LITTER_FILE
df = pd.read_csv(input_file, index_col=0)

n_fits_per_group = len(df)
n_agents = len(df.columns)
if compare_to_first:
Expand All @@ -48,7 +32,13 @@ def compare(ctx, input_file, n_groups, n_permutations, alpha, beta, seed, compar
with open(path_lf, 'rb') as fp:
comparator = pickle.load(fp)

Z = [np.hstack([comparator.eval_values[agent], df[agent]]) for agent in df.columns]
names = []
for i in range(len(comparator.agent_names)):
if i in comparator.current_comparisons.ravel():
names.append(comparator.agent_names[i])


Z = [np.hstack([comparator.eval_values[agent], df[agent]]) for agent in names]
if len(Z[0]) > comparator.K * n_fits_per_group:
raise ValueError('Error: you tried to use more group than what was initially declared, this is not allowed by the theory.')
assert "continue" in list(comparator.decisions.values()), "Test finished at last iteration."
Expand All @@ -57,13 +47,15 @@ def compare(ctx, input_file, n_groups, n_permutations, alpha, beta, seed, compar
comparator = MultipleAgentsComparator(n_fits_per_group, n_groups,
n_permutations, comparisons,
alpha, beta, seed)
Z = [df[agent].values for agent in df.columns]
names = df.columns

Z = [df[agent].values for agent in names]

data = {df.columns[i] : Z[i] for i in range(len(df.columns))}
data = {names[i] : Z[i] for i in range(len(names))}
# recover also the data of agent that were decided.
if comparator.agent_names is not None:
for agent in comparator.agent_names:
if agent not in df.columns:
if agent not in data.keys():
data[agent]=comparator.eval_values[agent]

comparator.partial_compare(data, False)
Expand All @@ -86,6 +78,100 @@ def compare(ctx, input_file, n_groups, n_permutations, alpha, beta, seed, compar
pickle.dump(comparator, fp)
click.echo("Comparator Saved")


@click.group()
@click.pass_context
def adastop(ctx):
"""
Program to perform adaptive stopping algorithm using csv file intput_file.

Use adastop sub-command --help to have help for a specific sub-command
"""
pass

@adastop.command()
@click.option("--n-groups", default=5, show_default=True, help="Number of groups.")
@click.option("--n-permutations", default=10000, show_default=True, help="Number of random permutations.")
@click.option("--alpha", default=0.05, show_default=True, help="Type I error.")
@click.option("--beta", default=0.0, show_default=True, help="early accept parameter.")
@click.option("--seed", default=None, type=int, show_default=True, help="Random seed.")
@click.option("--compare-to-first", is_flag=True, show_default=True, default=False, help="Compare all algorithms to the first algorithm.")
@click.argument('input_file',required = True, type=str)
@click.pass_context
def compare(ctx, input_file, n_groups, n_permutations, alpha, beta, seed, compare_to_first):
"""
Perform one step of adaptive stopping algorithm using csv file intput_file.
At first call, the comparator will be initialized with the arguments passed and then it will be saved to a save file in `.adastop_comparator.pkl`.
"""
path_lf = Path(input_file).parent.absolute() / LITTER_FILE
df = pd.read_csv(input_file, index_col=0)
compare_data(path_lf, df, n_groups, n_permutations, alpha, beta, seed, compare_to_first)


@adastop.command()
@click.option("--n-groups", default=5, show_default=True, help="Number of groups.")
@click.option("--n-permutations", default=10000, show_default=True, help="Number of random permutations.")
@click.option("--alpha", default=0.05, show_default=True, help="Type I error.")
@click.option("--beta", default=0.0, show_default=True, help="early accept parameter.")
@click.option("--seed", default=None, type=int, show_default=True, help="Random seed.")
@click.option("--compare-to-first", is_flag=True, show_default=True, default=False, help="Compare all algorithms to the first algorithm.")
@click.option("--size-group", default=6, show_default=True, help="Number of groups.")
@click.argument('config_file',required = True, type=str)
@click.pass_context
def compare_benchopt(ctx, config_file, size_group, n_groups, n_permutations, alpha, beta, seed, compare_to_first):
"""
Perform one step of computing benchmark and then adaptive stopping algorithm.
The benchmark is supposed to be in the current directory.

WARNING: still experimental.
"""
path_lf = Path(config_file).parent.absolute() / LITTER_FILE


if os.path.isfile(path_lf):
with open(path_lf, 'rb') as fp:
comparator = pickle.load(fp)
k = comparator.k
else:
k = 0

# if this is not first group, load data for comparator.
if os.path.isfile( "outputs/adastop_result_file_"+str(k)+".csv"):
df = pd.read_csv("outputs/adastop_result_file_"+str(k)+".csv", index_col=0)
else:
if k > 0:
undecided_solvers = []
for i in range(len(comparator.agent_names)):
if i in comparator.current_comparisons.ravel():
undecided_solvers.append(comparator.agent_names[i])

with open(config_file, 'r') as file:
config = yaml.safe_load(file)


config['solver']=undecided_solvers

with open("/tmp/config_benchopt.yml", 'w') as file:
config = yaml.dump(config, file)


print("Doing comparisons for "+str(len(undecided_solvers))+ " solvers: "+", ".join(undecided_solvers))
subprocess.check_output(["benchopt", "run", ".", "--config", "/tmp/config_benchopt.yml",
"--env", "-r", str(size_group),
"--output", "adastop_result_file_"+str(k)])

else:
# initially, run everything
subprocess.check_output(["benchopt", "run", ".", "--config",
config_file, "--env", "-r", str(size_group),
"--output", "adastop_result_file_"+str(k)])

df = process_benchopt("outputs/adastop_result_file_"+str(k)+".parquet")
df.to_csv("outputs/adastop_result_file_"+str(k)+".csv")

compare_data(path_lf, df, n_groups, n_permutations, alpha, beta, seed, compare_to_first)


@adastop.command()
@click.argument('folder',required = True, type=str)
@click.pass_context
Expand Down
3 changes: 2 additions & 1 deletion adastop/compare_agents.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,8 @@ def partial_compare(self, eval_values, verbose=True):
admissible_values_sup = values[
self.level_spent + icumulative_probas <= clevel
]
if len(np.unique(values)) < 1/clevel:
print("WARNING: too many values are equal, or size of group too small, the test may not be precise.")

if len(admissible_values_sup) > 0:
bk_sup = admissible_values_sup[0] # the minimum admissible value
Expand Down Expand Up @@ -324,7 +326,6 @@ def partial_compare(self, eval_values, verbose=True):
- np.mean(Z[comp[1]][: ((k + 1) * self.n[comp[1]])]
)
)

if Tmax > bk_sup:
id_reject = np.arange(len(current_decisions))[current_decisions== "continue"][imax]
current_decisions[id_reject] = "reject"
Expand Down
15 changes: 15 additions & 0 deletions adastop/data_processing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import pandas as pd


def process_benchopt(file):
"""
For now, suppose that there is only one dataset
"""
df = pd.read_parquet(file)
df= df[["solver_name",'objective_value','idx_rep']]
df_ret = { name : [] for name in df["solver_name"].unique()}
for rep in df["idx_rep"].unique():
for solver in df["solver_name"].unique():
df_rep_solver = df.loc[ (df["solver_name"]==solver) & (df["idx_rep"]==rep)]
df_ret[solver].append(df_rep_solver['objective_value'].iloc[-1])
return pd.DataFrame(df_ret)
Loading