fixed issues with configspace (#70)

* fixed issues with configspace * solved hyperparameter issues * solved flake8 issue * added TurboEvaluator to speed up search space unit tests * changed scoring argument the semantics is now that a string or (name, scoring) tuple is expected * fixed flake8 issues --------- Co-authored-by: felix <felix@frank>
fmohr · Oct 7, 2024 · 08fe0cb · 08fe0cb
1 parent 3810b00
commit 08fe0cb
Show file tree

Hide file tree

Showing 8 changed files with 1,580 additions and 1,733 deletions.
diff --git a/python/naiveautoml/_interfaces.py b/python/naiveautoml/_interfaces.py
@@ -4,7 +4,7 @@
 import numpy as np
 from scipy.sparse import issparse, spmatrix
 from ConfigSpace import ConfigurationSpace
-from sklearn.metrics import get_scorer, make_scorer
+from sklearn.metrics import get_scorer
 import pandas as pd
 import time
 from tqdm import tqdm
@@ -43,16 +43,24 @@ def __init__(self,
 
         # configure scorings
         def prepare_scoring(scoring):
+
+            is_str = isinstance(scoring, str)
+            is_tuple = isinstance(scoring, tuple)
+            if not is_str and not is_tuple:
+                raise ValueError(f"scoring must be either str or tuple but is {type(scoring)}")
+            if is_tuple and len(scoring) != 2:
+                raise ValueError("if scoring is a tuple, it must contain 2 elements, a name and the scoring function")
+
             out = {
-                "name": scoring if isinstance(scoring, str) else scoring["name"]
+                "name": scoring if isinstance(scoring, str) else scoring[0]
             }
             if type_of_target(self._y) == "multilabel-indicator":
                 out["fun"] = None
             else:
                 if isinstance(scoring, str):
                     out["fun"] = get_scorer(scoring)
                 else:
-                    out["fun"] = make_scorer(**{key: val for key, val in scoring.items() if key != "name"})
+                    out["fun"] = scoring[1]
             return out
 
         if scoring is None:

diff --git a/python/naiveautoml/algorithm_selection/_sklearn_factory.py b/python/naiveautoml/algorithm_selection/_sklearn_factory.py
@@ -663,7 +663,7 @@ def score_func(X, y):
         return sklearn.discriminant_analysis.QuadraticDiscriminantAnalysis(**params)
 
     if clazz == sklearn.linear_model.LogisticRegression:
-        params["dual"] = check_for_bool(params["dual"])
+        # params["dual"] = check_for_bool(params["dual"]) -- disabled now
         return sklearn.linear_model.LogisticRegression(**params)
 
     if clazz == sklearn.neural_network.MLPClassifier:

diff --git a/python/naiveautoml/naiveautoml.py b/python/naiveautoml/naiveautoml.py
@@ -234,8 +234,11 @@ def fit(self, X, y, categorical_features=None):
 
             # get candidate descriptor
             as_result_for_best_candidate = relevant_history.sort_values(self.task.scoring["name"]).iloc[-1]
+            config_space = self.algorithm_selector.get_config_space(as_result_for_best_candidate)
 
-            if (
+            if len(config_space) == 0:
+                self.logger.info(f"The selected algorithms {as_result_for_best_candidate} have no hyperparameters.")
+            elif (
                     deadline is None or
                     deadline is not None and deadline - time.time() >= as_result_for_best_candidate["runtime"] + 5
             ):
@@ -244,7 +247,7 @@ def fit(self, X, y, categorical_features=None):
                 self.hp_optimizer.reset(
                     task=self.task,
                     runtime_of_default_config=as_result_for_best_candidate["runtime"],
-                    config_space=self.algorithm_selector.get_config_space(as_result_for_best_candidate),
+                    config_space=config_space,
                     history_descriptor_creation_fun=lambda hp_config: self.algorithm_selector.create_history_descriptor(
                         as_result_for_best_candidate,
                         hp_config