diff --git a/examples/scripts/binary_classifier.py b/examples/scripts/binary_classifier.py index e4aff407..3eab9bdc 100644 --- a/examples/scripts/binary_classifier.py +++ b/examples/scripts/binary_classifier.py @@ -20,11 +20,12 @@ automl = AutoML( algorithms=["LightGBM"], - mode="Perform", + mode="Compete", explain_level=0, - train_ensemble=False, + train_ensemble=True, golden_features=False, features_selection=False, + eval_metric="auc" ) automl.fit(X_train, y_train) diff --git a/supervised/algorithms/catboost.py b/supervised/algorithms/catboost.py index cb4bdbd8..d02b0bac 100644 --- a/supervised/algorithms/catboost.py +++ b/supervised/algorithms/catboost.py @@ -159,7 +159,7 @@ def file_extension(self): "rsm": [0.7, 0.8, 0.9, 1], # random subspace method "subsample": [0.7, 0.8, 0.9, 1], # random subspace method "min_data_in_leaf": [1, 5, 10, 15, 20, 30, 50], - "loss_function": ["Logloss"] + "loss_function": ["Logloss"], } classification_default_params = { @@ -167,8 +167,8 @@ def file_extension(self): "depth": 6, "rsm": 0.9, "subsample": 1.0, - "min_data_in_leaf": 15, - "loss_function": "Logloss" + "min_data_in_leaf": 15, + "loss_function": "Logloss", } additional = { @@ -226,7 +226,7 @@ def file_extension(self): "rsm": 0.9, "subsample": 1.0, "min_data_in_leaf": 15, - "loss_function": "RMSE" + "loss_function": "RMSE", } AlgorithmsRegistry.add( diff --git a/supervised/base_automl.py b/supervised/base_automl.py index d4e436e8..e1b9acb8 100644 --- a/supervised/base_automl.py +++ b/supervised/base_automl.py @@ -1300,13 +1300,21 @@ def _validate_eval_metric(self): if isinstance(self.eval_metric, str) and self.eval_metric == "auto": return - if ( - self._get_ml_task() == BINARY_CLASSIFICATION - or self._get_ml_task() == MULTICLASS_CLASSIFICATION + if (self._get_ml_task() == BINARY_CLASSIFICATION) and self.eval_metric not in [ + "logloss", + "auc", + ]: + raise ValueError( + f"Metric {self.eval_metric} is not allowed in ML task: {self._get_ml_task()}. \ + Use 'logloss'" + ) + + elif ( + self._get_ml_task() == MULTICLASS_CLASSIFICATION ) and self.eval_metric != "logloss": raise ValueError( f"Metric {self.eval_metric} is not allowed in ML task: {self._get_ml_task()}. \ - Use 'log_loss'" + Use 'logloss'" ) elif self._get_ml_task() == REGRESSION and self.eval_metric != "rmse": diff --git a/supervised/utils/metric.py b/supervised/utils/metric.py index ebbff889..f4047099 100644 --- a/supervised/utils/metric.py +++ b/supervised/utils/metric.py @@ -30,6 +30,11 @@ def rmse(y_true, y_predicted): return np.sqrt(val) if val > 0 else -np.Inf +def negative_auc(y_true, y_predicted): + val = roc_auc_score(y_true, y_predicted) + return -1.0 * val + + class Metric(object): def __init__(self, params): if params is None: @@ -38,11 +43,18 @@ def __init__(self, params): self.name = self.params.get("name") if self.name is None: raise MetricException("Metric name not defined") - self.minimize_direction = self.name in ["logloss", "rmse", "mae", "ce", "mse"] + self.minimize_direction = self.name in [ + "logloss", + "rmse", + "mae", + "ce", + "mse", + "auc", + ] if self.name == "logloss": self.metric = logloss elif self.name == "auc": - self.metric = roc_auc_score + self.metric = negative_auc elif self.name == "acc": self.metric = accuracy_score elif self.name == "rmse":