feat: accept make_scorer function as custom scoring

fmohr · Oct 7, 2024 · a60d14b · a60d14b
1 parent 6be518f
commit a60d14b
Show file tree

Hide file tree

Showing 2 changed files with 42 additions and 2 deletions.
diff --git a/python/naiveautoml/_interfaces.py b/python/naiveautoml/_interfaces.py
@@ -10,6 +10,7 @@
 from tqdm import tqdm
 
 from sklearn.utils.multiclass import type_of_target
+from sklearn.metrics._scorer import _BaseScorer
 
 
 class SupervisedTask:
@@ -43,14 +44,22 @@ def __init__(self,
 
         # configure scorings
         def prepare_scoring(scoring):
+            if isinstance(scoring, str):
+                name = scoring
+            elif isinstance(scoring, _BaseScorer):
+                name = scoring._score_func.__name__
+            else:
+                name = scoring["name"]
             out = {
-                "name": scoring if isinstance(scoring, str) else scoring["name"]
+                "name": name
             }
             if type_of_target(self._y) == "multilabel-indicator":
                 out["fun"] = None
             else:
                 if isinstance(scoring, str):
                     out["fun"] = get_scorer(scoring)
+                elif isinstance(scoring, _BaseScorer):
+                    out["fun"] = scoring
                 else:
                     out["fun"] = make_scorer(**{key: val for key, val in scoring.items() if key != "name"})
             return out

diff --git a/python/test/test_naiveautoml.py b/python/test/test_naiveautoml.py
@@ -1,7 +1,7 @@
 import logging
 
 import pytest
-from sklearn.metrics import get_scorer
+from sklearn.metrics import get_scorer, make_scorer
 
 import naiveautoml
 import numpy as np
@@ -126,6 +126,37 @@ def test_acceptance_of_dataframe(self, openmlid):
         )
         naml.fit(X, y)
 
+    @parameterized.expand([
+        (61, True),
+        (61, False),
+    ])
+    def test_acceptance_of_custom_scoring(self, openmlid, use_make_scorer):        
+        scoring = make_scorer(**{
+            "name": "custom_scorer",
+            "score_func": lambda y, y_pred: np.random.rand(), # custom scorer with a random score as result, just to test if it is accepted
+            "greater_is_better": True,
+            "needs_proba": True,
+            "needs_threshold": False,
+        }) if use_make_scorer else {
+            "name": "custom_scorer",
+            "score_func": lambda y, y_pred: np.random.rand(), # custom scorer with a random score as result, just to test if it is accepted
+            "greater_is_better": True,
+            "needs_proba": True,
+            "needs_threshold": False,
+        }
+
+        self.logger.info(f"Testing acceptance of dataframes")
+        X, y = get_dataset(openmlid, as_numpy=False)
+        naml = naiveautoml.NaiveAutoML(
+            logger_name="naml",
+            timeout_overall=15,
+            max_hpo_iterations=1,
+            show_progress=True,
+            raise_errors=True,
+            scoring=scoring
+        )
+        naml.fit(X, y)
+
     @parameterized.expand([
             (188,),  # eucalyptus. Very important because has both missing values and categorical attributes
         ])