From a60d14b452abf5943e764e6bbc2e6d103ff67108 Mon Sep 17 00:00:00 2001 From: Juan Rocha Date: Mon, 7 Oct 2024 16:31:58 -0500 Subject: [PATCH] feat: accept make_scorer function as custom scoring --- python/naiveautoml/_interfaces.py | 11 ++++++++++- python/test/test_naiveautoml.py | 33 ++++++++++++++++++++++++++++++- 2 files changed, 42 insertions(+), 2 deletions(-) diff --git a/python/naiveautoml/_interfaces.py b/python/naiveautoml/_interfaces.py index a0c71ad..8058f46 100644 --- a/python/naiveautoml/_interfaces.py +++ b/python/naiveautoml/_interfaces.py @@ -10,6 +10,7 @@ from tqdm import tqdm from sklearn.utils.multiclass import type_of_target +from sklearn.metrics._scorer import _BaseScorer class SupervisedTask: @@ -43,14 +44,22 @@ def __init__(self, # configure scorings def prepare_scoring(scoring): + if isinstance(scoring, str): + name = scoring + elif isinstance(scoring, _BaseScorer): + name = scoring._score_func.__name__ + else: + name = scoring["name"] out = { - "name": scoring if isinstance(scoring, str) else scoring["name"] + "name": name } if type_of_target(self._y) == "multilabel-indicator": out["fun"] = None else: if isinstance(scoring, str): out["fun"] = get_scorer(scoring) + elif isinstance(scoring, _BaseScorer): + out["fun"] = scoring else: out["fun"] = make_scorer(**{key: val for key, val in scoring.items() if key != "name"}) return out diff --git a/python/test/test_naiveautoml.py b/python/test/test_naiveautoml.py index 50306e6..c21f2c6 100644 --- a/python/test/test_naiveautoml.py +++ b/python/test/test_naiveautoml.py @@ -1,7 +1,7 @@ import logging import pytest -from sklearn.metrics import get_scorer +from sklearn.metrics import get_scorer, make_scorer import naiveautoml import numpy as np @@ -126,6 +126,37 @@ def test_acceptance_of_dataframe(self, openmlid): ) naml.fit(X, y) + @parameterized.expand([ + (61, True), + (61, False), + ]) + def test_acceptance_of_custom_scoring(self, openmlid, use_make_scorer): + scoring = make_scorer(**{ + "name": "custom_scorer", + "score_func": lambda y, y_pred: np.random.rand(), # custom scorer with a random score as result, just to test if it is accepted + "greater_is_better": True, + "needs_proba": True, + "needs_threshold": False, + }) if use_make_scorer else { + "name": "custom_scorer", + "score_func": lambda y, y_pred: np.random.rand(), # custom scorer with a random score as result, just to test if it is accepted + "greater_is_better": True, + "needs_proba": True, + "needs_threshold": False, + } + + self.logger.info(f"Testing acceptance of dataframes") + X, y = get_dataset(openmlid, as_numpy=False) + naml = naiveautoml.NaiveAutoML( + logger_name="naml", + timeout_overall=15, + max_hpo_iterations=1, + show_progress=True, + raise_errors=True, + scoring=scoring + ) + naml.fit(X, y) + @parameterized.expand([ (188,), # eucalyptus. Very important because has both missing values and categorical attributes ])