Skip to content

Commit

Permalink
fixed bug with dictionary as default parameter
Browse files Browse the repository at this point in the history
  • Loading branch information
AngelG14 committed Sep 18, 2024
1 parent 1f1bf3c commit 6d8e5eb
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 27 deletions.
17 changes: 11 additions & 6 deletions python/naiveautoml/commons.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def __init__(self,
logger_name=None,
use_caching=True,
error_treatment="info",
kwargs_evaluation_fun={},
kwargs_evaluation_fun=None,
random_state=None
):

Expand Down Expand Up @@ -83,11 +83,10 @@ def get_evaluation_fun(self, evaluation_fun, kwargs_evaluation_fun):
self.logger.info("Choosing mccv as default evaluation function.")
evaluation_fun = "mccv"

if evaluation_fun in ["lccv", "mccv"]:
if evaluation_fun in ["kfold", "mccv"]:
is_small_dataset = task.X.shape[0] < 2000
is_medium_dataset = not is_small_dataset and task.X.shape[0] < 20000
is_large_dataset = not (is_small_dataset or is_medium_dataset)

if not kwargs_evaluation_fun:
if is_small_dataset:
self.logger.info("This is a small dataset, choosing 5 splits for evaluation")
Expand All @@ -103,12 +102,18 @@ def get_evaluation_fun(self, evaluation_fun, kwargs_evaluation_fun):
"Invalid case for dataset size!! This should never happen. Please report this as a bug.")

if evaluation_fun == "mccv":
return MccvEvaluator(task.inferred_task_type, random_state=self.random_state, **kwargs_evaluation_fun)
return MccvEvaluator(task_type=task.inferred_task_type,
random_state=self.random_state,
kwargs_evaluation_fun=kwargs_evaluation_fun)
elif evaluation_fun == "kfold":
return KFoldEvaluator(task.inferred_task_type, random_state=self.random_state, **kwargs_evaluation_fun)
return KFoldEvaluator(task_type=task.inferred_task_type,
random_state=self.random_state,
kwargs_evaluation_fun=kwargs_evaluation_fun)

elif evaluation_fun == "lccv":
return LccvEvaluator(task.inferred_task_type, random_state=self.random_state, **kwargs_evaluation_fun)
return LccvEvaluator(task_type=task.inferred_task_type,
random_state=self.random_state,
kwargs_evaluation_fun=kwargs_evaluation_fun)
else:
return evaluation_fun

Expand Down
44 changes: 27 additions & 17 deletions python/naiveautoml/evaluators.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,18 +11,21 @@ class LccvEvaluator:

def __init__(self,
task_type,
train_size=0.8,
logger_name="naml.evaluator",
repetitions_per_anchor=5,
random_state=None):
random_state=None,
kwargs_evaluation_fun=None):

self.kwargs_lccv = kwargs_evaluation_fun
self.task_type = task_type
self.r = -np.inf
self.train_size = train_size
self.repetitions_per_anchor = repetitions_per_anchor
self.random_state = random_state
self.logger = logging.getLogger(logger_name)

if "target_anchor" not in self.kwargs_lccv:
self.kwargs_lccv["target_anchor"] = 0.8
if "max_evaluations" not in self.kwargs_lccv:
self.kwargs_lccv["max_evaluations"] = 5

def __call__(self, pl, X, y, scorings, error_treatment="raise"):
warnings.filterwarnings('ignore', module='sklearn')
warnings.filterwarnings('ignore', module='numpy')
Expand All @@ -38,9 +41,8 @@ def __call__(self, pl, X, y, scorings, error_treatment="raise"):
r=self.r,
base_scoring=scorings[0]["name"],
additional_scorings=[s["name"] for s in scorings[1:]],
target_anchor=self.train_size,
max_evaluations=self.repetitions_per_anchor,
seed=self.random_state
seed=self.random_state,
**self.kwargs_lccv
)
if not np.isnan(score) and score > self.r:
self.r = score
Expand Down Expand Up @@ -200,17 +202,21 @@ def evaluate_split(self, pl, X, y, train_index, test_index, scorings, error_trea

class KFoldEvaluator(SplitBasedEvaluator):

def __init__(self, task_type, n_splits, random_state=None, logger_name="naml.evaluator"):
def __init__(self,
task_type,
random_state=None,
logger_name="naml.evaluator",
kwargs_evaluation_fun=None):

# define splitter
if task_type in ["classification"]:
splitter = sklearn.model_selection.StratifiedKFold(
n_splits=n_splits,
random_state=random_state,
shuffle=True
shuffle=True,
**kwargs_evaluation_fun
)
elif task_type in ["regression", "multilabel-indicator"]:
splitter = sklearn.model_selection.KFold(n_splits=n_splits, random_state=random_state, shuffle=True)
splitter = sklearn.model_selection.KFold(random_state=random_state, shuffle=True, **kwargs_evaluation_fun)
else:
raise ValueError(f"Unsupported task type {task_type}")

Expand All @@ -219,19 +225,23 @@ def __init__(self, task_type, n_splits, random_state=None, logger_name="naml.eva

class MccvEvaluator(SplitBasedEvaluator):

def __init__(self, task_type, n_splits, random_state=None, logger_name="naml.evaluator"):
def __init__(self,
task_type,
random_state=None,
logger_name="naml.evaluator",
kwargs_evaluation_fun=None):

if task_type in ["classification"]:
splitter = sklearn.model_selection.StratifiedShuffleSplit(
n_splits=n_splits,
train_size=0.8,
random_state=random_state
random_state=random_state,
**kwargs_evaluation_fun
)
elif task_type in ["regression", "multilabel-indicator"]:
splitter = sklearn.model_selection.ShuffleSplit(
n_splits=n_splits,
train_size=0.8,
random_state=random_state
random_state=random_state,
**kwargs_evaluation_fun
)
else:
raise ValueError(f"Unsupported task type {task_type}")
Expand Down
17 changes: 13 additions & 4 deletions python/naiveautoml/naiveautoml.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,9 @@ def __init__(self,
max_hpo_iterations=100,
max_hpo_iterations_without_imp=100,
max_hpo_time_without_imp=1800,
kwargs_as={},
kwargs_hpo={},
kwargs_evaluation_fun={},
kwargs_as=None,
kwargs_hpo=None,
kwargs_evaluation_fun=None,
logger_name=None,
random_state: int = None,
strictly_naive: bool = False,
Expand Down Expand Up @@ -59,6 +59,12 @@ def __init__(self,
self.logger_name = logger_name
self.logger = logging.getLogger('naiveautoml' if logger_name is None else logger_name)

if kwargs_as is None:
kwargs_as = {}

if kwargs_hpo is None:
kwargs_hpo = {}

# configure algorithm selector
if isinstance(algorithm_selector, str):
accepted_selectors = ["sklearn"]
Expand Down Expand Up @@ -100,7 +106,10 @@ def __init__(self,

# configure evaluation function
self.evaluation_fun = evaluation_fun
self.kwargs_evaluation_fun = kwargs_evaluation_fun
if kwargs_evaluation_fun is None:
self.kwargs_evaluation_fun = {}
else:
self.kwargs_evaluation_fun = kwargs_evaluation_fun

# memorize scorings
self.scoring = None
Expand Down

0 comments on commit 6d8e5eb

Please sign in to comment.