From 6d8e5ebd9105b7fc732ac8acdb072b427f7648e6 Mon Sep 17 00:00:00 2001 From: Angel Gomez Date: Wed, 18 Sep 2024 13:44:42 -0500 Subject: [PATCH] fixed bug with dictionary as default parameter --- python/naiveautoml/commons.py | 17 +++++++----- python/naiveautoml/evaluators.py | 44 +++++++++++++++++++------------ python/naiveautoml/naiveautoml.py | 17 +++++++++--- 3 files changed, 51 insertions(+), 27 deletions(-) diff --git a/python/naiveautoml/commons.py b/python/naiveautoml/commons.py index 78fd4c1..3c20902 100644 --- a/python/naiveautoml/commons.py +++ b/python/naiveautoml/commons.py @@ -24,7 +24,7 @@ def __init__(self, logger_name=None, use_caching=True, error_treatment="info", - kwargs_evaluation_fun={}, + kwargs_evaluation_fun=None, random_state=None ): @@ -83,11 +83,10 @@ def get_evaluation_fun(self, evaluation_fun, kwargs_evaluation_fun): self.logger.info("Choosing mccv as default evaluation function.") evaluation_fun = "mccv" - if evaluation_fun in ["lccv", "mccv"]: + if evaluation_fun in ["kfold", "mccv"]: is_small_dataset = task.X.shape[0] < 2000 is_medium_dataset = not is_small_dataset and task.X.shape[0] < 20000 is_large_dataset = not (is_small_dataset or is_medium_dataset) - if not kwargs_evaluation_fun: if is_small_dataset: self.logger.info("This is a small dataset, choosing 5 splits for evaluation") @@ -103,12 +102,18 @@ def get_evaluation_fun(self, evaluation_fun, kwargs_evaluation_fun): "Invalid case for dataset size!! This should never happen. Please report this as a bug.") if evaluation_fun == "mccv": - return MccvEvaluator(task.inferred_task_type, random_state=self.random_state, **kwargs_evaluation_fun) + return MccvEvaluator(task_type=task.inferred_task_type, + random_state=self.random_state, + kwargs_evaluation_fun=kwargs_evaluation_fun) elif evaluation_fun == "kfold": - return KFoldEvaluator(task.inferred_task_type, random_state=self.random_state, **kwargs_evaluation_fun) + return KFoldEvaluator(task_type=task.inferred_task_type, + random_state=self.random_state, + kwargs_evaluation_fun=kwargs_evaluation_fun) elif evaluation_fun == "lccv": - return LccvEvaluator(task.inferred_task_type, random_state=self.random_state, **kwargs_evaluation_fun) + return LccvEvaluator(task_type=task.inferred_task_type, + random_state=self.random_state, + kwargs_evaluation_fun=kwargs_evaluation_fun) else: return evaluation_fun diff --git a/python/naiveautoml/evaluators.py b/python/naiveautoml/evaluators.py index 9e57095..7682598 100644 --- a/python/naiveautoml/evaluators.py +++ b/python/naiveautoml/evaluators.py @@ -11,18 +11,21 @@ class LccvEvaluator: def __init__(self, task_type, - train_size=0.8, logger_name="naml.evaluator", - repetitions_per_anchor=5, - random_state=None): + random_state=None, + kwargs_evaluation_fun=None): + self.kwargs_lccv = kwargs_evaluation_fun self.task_type = task_type self.r = -np.inf - self.train_size = train_size - self.repetitions_per_anchor = repetitions_per_anchor self.random_state = random_state self.logger = logging.getLogger(logger_name) + if "target_anchor" not in self.kwargs_lccv: + self.kwargs_lccv["target_anchor"] = 0.8 + if "max_evaluations" not in self.kwargs_lccv: + self.kwargs_lccv["max_evaluations"] = 5 + def __call__(self, pl, X, y, scorings, error_treatment="raise"): warnings.filterwarnings('ignore', module='sklearn') warnings.filterwarnings('ignore', module='numpy') @@ -38,9 +41,8 @@ def __call__(self, pl, X, y, scorings, error_treatment="raise"): r=self.r, base_scoring=scorings[0]["name"], additional_scorings=[s["name"] for s in scorings[1:]], - target_anchor=self.train_size, - max_evaluations=self.repetitions_per_anchor, - seed=self.random_state + seed=self.random_state, + **self.kwargs_lccv ) if not np.isnan(score) and score > self.r: self.r = score @@ -200,17 +202,21 @@ def evaluate_split(self, pl, X, y, train_index, test_index, scorings, error_trea class KFoldEvaluator(SplitBasedEvaluator): - def __init__(self, task_type, n_splits, random_state=None, logger_name="naml.evaluator"): + def __init__(self, + task_type, + random_state=None, + logger_name="naml.evaluator", + kwargs_evaluation_fun=None): # define splitter if task_type in ["classification"]: splitter = sklearn.model_selection.StratifiedKFold( - n_splits=n_splits, random_state=random_state, - shuffle=True + shuffle=True, + **kwargs_evaluation_fun ) elif task_type in ["regression", "multilabel-indicator"]: - splitter = sklearn.model_selection.KFold(n_splits=n_splits, random_state=random_state, shuffle=True) + splitter = sklearn.model_selection.KFold(random_state=random_state, shuffle=True, **kwargs_evaluation_fun) else: raise ValueError(f"Unsupported task type {task_type}") @@ -219,19 +225,23 @@ def __init__(self, task_type, n_splits, random_state=None, logger_name="naml.eva class MccvEvaluator(SplitBasedEvaluator): - def __init__(self, task_type, n_splits, random_state=None, logger_name="naml.evaluator"): + def __init__(self, + task_type, + random_state=None, + logger_name="naml.evaluator", + kwargs_evaluation_fun=None): if task_type in ["classification"]: splitter = sklearn.model_selection.StratifiedShuffleSplit( - n_splits=n_splits, train_size=0.8, - random_state=random_state + random_state=random_state, + **kwargs_evaluation_fun ) elif task_type in ["regression", "multilabel-indicator"]: splitter = sklearn.model_selection.ShuffleSplit( - n_splits=n_splits, train_size=0.8, - random_state=random_state + random_state=random_state, + **kwargs_evaluation_fun ) else: raise ValueError(f"Unsupported task type {task_type}") diff --git a/python/naiveautoml/naiveautoml.py b/python/naiveautoml/naiveautoml.py index c0a97e5..913f9bf 100644 --- a/python/naiveautoml/naiveautoml.py +++ b/python/naiveautoml/naiveautoml.py @@ -29,9 +29,9 @@ def __init__(self, max_hpo_iterations=100, max_hpo_iterations_without_imp=100, max_hpo_time_without_imp=1800, - kwargs_as={}, - kwargs_hpo={}, - kwargs_evaluation_fun={}, + kwargs_as=None, + kwargs_hpo=None, + kwargs_evaluation_fun=None, logger_name=None, random_state: int = None, strictly_naive: bool = False, @@ -59,6 +59,12 @@ def __init__(self, self.logger_name = logger_name self.logger = logging.getLogger('naiveautoml' if logger_name is None else logger_name) + if kwargs_as is None: + kwargs_as = {} + + if kwargs_hpo is None: + kwargs_hpo = {} + # configure algorithm selector if isinstance(algorithm_selector, str): accepted_selectors = ["sklearn"] @@ -100,7 +106,10 @@ def __init__(self, # configure evaluation function self.evaluation_fun = evaluation_fun - self.kwargs_evaluation_fun = kwargs_evaluation_fun + if kwargs_evaluation_fun is None: + self.kwargs_evaluation_fun = {} + else: + self.kwargs_evaluation_fun = kwargs_evaluation_fun # memorize scorings self.scoring = None