From 08fe0cb1dbfe55ffab3c84d66d0d9a40b8229982 Mon Sep 17 00:00:00 2001 From: Felix Mohr Date: Tue, 8 Oct 2024 00:16:11 +0200 Subject: [PATCH] fixed issues with configspace (#70) * fixed issues with configspace * solved hyperparameter issues * solved flake8 issue * added TurboEvaluator to speed up search space unit tests * changed scoring argument the semantics is now that a string or (name, scoring) tuple is expected * fixed flake8 issues --------- Co-authored-by: felix --- python/naiveautoml/_interfaces.py | 14 +- .../algorithm_selection/_sklearn_factory.py | 2 +- python/naiveautoml/naiveautoml.py | 7 +- .../searchspace-classification.json | 204 +- .../naiveautoml/searchspace-regression.json | 194 +- python/setup.py | 8 +- python/test/test_naiveautoml.py | 161 +- python/usage-example.ipynb | 2723 ++++++++--------- 8 files changed, 1580 insertions(+), 1733 deletions(-) diff --git a/python/naiveautoml/_interfaces.py b/python/naiveautoml/_interfaces.py index a0c71ad..69e729e 100644 --- a/python/naiveautoml/_interfaces.py +++ b/python/naiveautoml/_interfaces.py @@ -4,7 +4,7 @@ import numpy as np from scipy.sparse import issparse, spmatrix from ConfigSpace import ConfigurationSpace -from sklearn.metrics import get_scorer, make_scorer +from sklearn.metrics import get_scorer import pandas as pd import time from tqdm import tqdm @@ -43,8 +43,16 @@ def __init__(self, # configure scorings def prepare_scoring(scoring): + + is_str = isinstance(scoring, str) + is_tuple = isinstance(scoring, tuple) + if not is_str and not is_tuple: + raise ValueError(f"scoring must be either str or tuple but is {type(scoring)}") + if is_tuple and len(scoring) != 2: + raise ValueError("if scoring is a tuple, it must contain 2 elements, a name and the scoring function") + out = { - "name": scoring if isinstance(scoring, str) else scoring["name"] + "name": scoring if isinstance(scoring, str) else scoring[0] } if type_of_target(self._y) == "multilabel-indicator": out["fun"] = None @@ -52,7 +60,7 @@ def prepare_scoring(scoring): if isinstance(scoring, str): out["fun"] = get_scorer(scoring) else: - out["fun"] = make_scorer(**{key: val for key, val in scoring.items() if key != "name"}) + out["fun"] = scoring[1] return out if scoring is None: diff --git a/python/naiveautoml/algorithm_selection/_sklearn_factory.py b/python/naiveautoml/algorithm_selection/_sklearn_factory.py index afac376..897372e 100644 --- a/python/naiveautoml/algorithm_selection/_sklearn_factory.py +++ b/python/naiveautoml/algorithm_selection/_sklearn_factory.py @@ -663,7 +663,7 @@ def score_func(X, y): return sklearn.discriminant_analysis.QuadraticDiscriminantAnalysis(**params) if clazz == sklearn.linear_model.LogisticRegression: - params["dual"] = check_for_bool(params["dual"]) + # params["dual"] = check_for_bool(params["dual"]) -- disabled now return sklearn.linear_model.LogisticRegression(**params) if clazz == sklearn.neural_network.MLPClassifier: diff --git a/python/naiveautoml/naiveautoml.py b/python/naiveautoml/naiveautoml.py index e6f18a2..aa21df8 100644 --- a/python/naiveautoml/naiveautoml.py +++ b/python/naiveautoml/naiveautoml.py @@ -234,8 +234,11 @@ def fit(self, X, y, categorical_features=None): # get candidate descriptor as_result_for_best_candidate = relevant_history.sort_values(self.task.scoring["name"]).iloc[-1] + config_space = self.algorithm_selector.get_config_space(as_result_for_best_candidate) - if ( + if len(config_space) == 0: + self.logger.info(f"The selected algorithms {as_result_for_best_candidate} have no hyperparameters.") + elif ( deadline is None or deadline is not None and deadline - time.time() >= as_result_for_best_candidate["runtime"] + 5 ): @@ -244,7 +247,7 @@ def fit(self, X, y, categorical_features=None): self.hp_optimizer.reset( task=self.task, runtime_of_default_config=as_result_for_best_candidate["runtime"], - config_space=self.algorithm_selector.get_config_space(as_result_for_best_candidate), + config_space=config_space, history_descriptor_creation_fun=lambda hp_config: self.algorithm_selector.create_history_descriptor( as_result_for_best_candidate, hp_config diff --git a/python/naiveautoml/searchspace-classification.json b/python/naiveautoml/searchspace-classification.json index 88364c7..3aa2c4a 100644 --- a/python/naiveautoml/searchspace-classification.json +++ b/python/naiveautoml/searchspace-classification.json @@ -42,7 +42,7 @@ "log": false, "lower": 10, "upper": 2000, - "default": 1000 + "default_value": 1000 }, { "name": "output_distribution", @@ -51,7 +51,7 @@ "uniform", "normal" ], - "default": "uniform", + "default_value": "uniform", "probabilities": null } ], @@ -71,7 +71,7 @@ "log": false, "lower": 0.7, "upper": 0.999, - "default": 0.75 + "default_value": 0.75 }, { "name": "q_min", @@ -79,7 +79,7 @@ "log": false, "lower": 0.001, "upper": 0.3, - "default": 0.25 + "default_value": 0.25 } ], "conditions": [], @@ -124,7 +124,7 @@ "parallel", "deflation" ], - "default": "parallel", + "default_value": "parallel", "probabilities": null }, { @@ -135,7 +135,7 @@ "exp", "cube" ], - "default": "logcosh", + "default_value": "logcosh", "probabilities": null }, { @@ -146,7 +146,7 @@ "arbitrary-variance", "unit-variance" ], - "default": "False", + "default_value": "False", "probabilities": null }, { @@ -155,7 +155,7 @@ "log": false, "lower": 10, "upper": 2000, - "default": 100 + "default_value": 100 } ], "conditions": [ @@ -185,7 +185,7 @@ "manhattan", "cosine" ], - "default": "euclidean", + "default_value": "euclidean", "probabilities": null }, { @@ -196,7 +196,7 @@ "complete", "average" ], - "default": "ward", + "default_value": "ward", "probabilities": null }, { @@ -205,7 +205,7 @@ "log": false, "lower": 2, "upper": 400, - "default": 25 + "default_value": 25 }, { "name": "pooling_func", @@ -215,7 +215,7 @@ "median", "max" ], - "default": "mean", + "default_value": "mean", "probabilities": null } ], @@ -260,7 +260,7 @@ "sigmoid", "cosine" ], - "default": "rbf", + "default_value": "rbf", "probabilities": null }, { @@ -269,7 +269,7 @@ "log": false, "lower": 10, "upper": 2000, - "default": 100 + "default_value": 100 }, { "name": "coef0", @@ -277,7 +277,7 @@ "log": false, "lower": -1.0, "upper": 1.0, - "default": 0.0 + "default_value": 0.0 }, { "name": "degree", @@ -285,7 +285,7 @@ "log": false, "lower": 2, "upper": 5, - "default": 3 + "default_value": 3 }, { "name": "gamma", @@ -293,7 +293,7 @@ "log": true, "lower": 3.0517578125e-05, "upper": 8.0, - "default": 0.01 + "default_value": 0.01 } ], "conditions": [ @@ -337,7 +337,7 @@ "log": true, "lower": 3.0517578125e-05, "upper": 8.0, - "default": 1.0 + "default_value": 1.0 }, { "name": "n_components", @@ -345,7 +345,7 @@ "log": true, "lower": 50, "upper": 10000, - "default": 100 + "default_value": 100 } ], "conditions": [], @@ -368,7 +368,7 @@ "cosine", "chi2" ], - "default": "rbf", + "default_value": "rbf", "probabilities": null }, { @@ -377,7 +377,7 @@ "log": true, "lower": 50, "upper": 10000, - "default": 100 + "default_value": 100 }, { "name": "coef0", @@ -385,7 +385,7 @@ "log": false, "lower": -1.0, "upper": 1.0, - "default": 0.0 + "default_value": 0.0 }, { "name": "degree", @@ -393,7 +393,7 @@ "log": false, "lower": 2, "upper": 5, - "default": 3 + "default_value": 3 }, { "name": "gamma", @@ -401,7 +401,7 @@ "log": true, "lower": 3.0517578125e-05, "upper": 8.0, - "default": 0.1 + "default_value": 0.1 } ], "conditions": [ @@ -447,7 +447,7 @@ "log": false, "lower": 0.5, "upper": 0.9999, - "default": 0.9999 + "default_value": 0.9999 }, { "name": "whiten", @@ -456,7 +456,7 @@ "False", "True" ], - "default": "False", + "default_value": "False", "probabilities": null } ], @@ -476,7 +476,7 @@ "log": false, "lower": 2, "upper": 3, - "default": 2 + "default_value": 2 }, { "name": "include_bias", @@ -485,7 +485,7 @@ "True", "False" ], - "default": "True", + "default_value": "True", "probabilities": null }, { @@ -495,7 +495,7 @@ "False", "True" ], - "default": "False", + "default_value": "False", "probabilities": null } ], @@ -515,7 +515,7 @@ "log": false, "lower": 1.0, "upper": 99.0, - "default": 50.0 + "default_value": 50.0 }, { "name": "score_func", @@ -525,7 +525,7 @@ "f_classif", "mutual_info" ], - "default": "chi2", + "default_value": "chi2", "probabilities": null } ], @@ -545,7 +545,7 @@ "log": false, "lower": 0.01, "upper": 0.5, - "default": 0.1 + "default_value": 0.1 }, { "name": "score_func", @@ -555,7 +555,7 @@ "f_classif", "mutual_info_classif" ], - "default": "chi2", + "default_value": "chi2", "probabilities": null }, { @@ -566,7 +566,7 @@ "fdr", "fwe" ], - "default": "fpr", + "default_value": "fpr", "probabilities": null } ], @@ -599,7 +599,7 @@ "True", "False" ], - "default": "False", + "default_value": "False", "probabilities": null }, { @@ -609,7 +609,7 @@ "gini", "entropy" ], - "default": "gini", + "default_value": "gini", "probabilities": null }, { @@ -623,7 +623,7 @@ "log": false, "lower": 0.0, "upper": 1.0, - "default": 0.5 + "default_value": 0.5 }, { "name": "max_leaf_nodes", @@ -641,7 +641,7 @@ "log": false, "lower": 1, "upper": 20, - "default": 1 + "default_value": 1 }, { "name": "min_samples_split", @@ -649,7 +649,7 @@ "log": false, "lower": 2, "upper": 20, - "default": 2 + "default_value": 2 }, { "name": "min_weight_fraction_leaf", @@ -674,7 +674,7 @@ "True", "False" ], - "default": "True", + "default_value": "True", "probabilities": null }, { @@ -684,7 +684,7 @@ "gini", "entropy" ], - "default": "gini", + "default_value": "gini", "probabilities": null }, { @@ -698,7 +698,7 @@ "log": false, "lower": 0.0, "upper": 1.0, - "default": 0.5 + "default_value": 0.5 }, { "name": "max_leaf_nodes", @@ -716,7 +716,7 @@ "log": false, "lower": 1, "upper": 20, - "default": 1 + "default_value": 1 }, { "name": "min_samples_split", @@ -724,7 +724,7 @@ "log": false, "lower": 2, "upper": 20, - "default": 2 + "default_value": 2 }, { "name": "min_weight_fraction_leaf", @@ -750,7 +750,7 @@ "valid", "train" ], - "default": "off", + "default_value": "off", "probabilities": null }, { @@ -759,7 +759,7 @@ "log": true, "lower": 1e-10, "upper": 1.0, - "default": 1e-10 + "default_value": 1e-10 }, { "name": "learning_rate", @@ -767,7 +767,7 @@ "log": true, "lower": 0.01, "upper": 1.0, - "default": 0.1 + "default_value": 0.1 }, { "name": "loss", @@ -790,7 +790,7 @@ "log": true, "lower": 3, "upper": 2047, - "default": 31 + "default_value": 31 }, { "name": "min_samples_leaf", @@ -798,7 +798,7 @@ "log": true, "lower": 1, "upper": 200, - "default": 20 + "default_value": 20 }, { "name": "scoring", @@ -816,7 +816,7 @@ "log": false, "lower": 1, "upper": 20, - "default": 10 + "default_value": 10 }, { "name": "validation_fraction", @@ -824,7 +824,7 @@ "log": false, "lower": 0.01, "upper": 0.4, - "default": 0.1 + "default_value": 0.1 } ], "conditions": [ @@ -859,7 +859,7 @@ "log": true, "lower": 0.01, "upper": 100.0, - "default": 1.0 + "default_value": 1.0 }, { "name": "fit_prior", @@ -868,7 +868,7 @@ "True", "False" ], - "default": "True", + "default_value": "True", "probabilities": null } ], @@ -889,7 +889,7 @@ "gini", "entropy" ], - "default": "gini", + "default_value": "gini", "probabilities": null }, { @@ -898,7 +898,7 @@ "log": false, "lower": 0.0, "upper": 2.0, - "default": 0.5 + "default_value": 0.5 }, { "name": "max_features", @@ -921,7 +921,7 @@ "log": false, "lower": 1, "upper": 20, - "default": 1 + "default_value": 1 }, { "name": "min_samples_split", @@ -929,7 +929,7 @@ "log": false, "lower": 2, "upper": 20, - "default": 2 + "default_value": 2 }, { "name": "min_weight_fraction_leaf", @@ -963,7 +963,7 @@ "log": true, "lower": 1, "upper": 100, - "default": 1 + "default_value": 1 }, { "name": "p", @@ -972,7 +972,7 @@ 1, 2 ], - "default": 2, + "default_value": 2, "probabilities": null }, { @@ -982,7 +982,7 @@ "uniform", "distance" ], - "default": "uniform", + "default_value": "uniform", "probabilities": null } ], @@ -1004,7 +1004,7 @@ "auto", "manual" ], - "default": "None", + "default_value": "None", "probabilities": null }, { @@ -1013,7 +1013,7 @@ "log": true, "lower": 1e-05, "upper": 0.1, - "default": 0.0001 + "default_value": 0.0001 }, { "name": "shrinkage_factor", @@ -1021,7 +1021,7 @@ "log": false, "lower": 0.0, "upper": 1.0, - "default": 0.5 + "default_value": 0.5 } ], "conditions": [ @@ -1047,7 +1047,7 @@ "log": false, "lower": 0.0, "upper": 1.0, - "default": 0.0 + "default_value": 0.0 } ], "conditions": [], @@ -1066,7 +1066,7 @@ "log": true, "lower": 0.03125, "upper": 32768.0, - "default": 1.0 + "default_value": 1.0 }, { "name": "gamma", @@ -1074,7 +1074,7 @@ "log": true, "lower": 3.0517578125e-05, "upper": 8.0, - "default": 0.1 + "default_value": 0.1 }, { "name": "kernel", @@ -1093,7 +1093,7 @@ "True", "False" ], - "default": "True", + "default_value": "True", "probabilities": null }, { @@ -1102,7 +1102,7 @@ "log": true, "lower": 1e-05, "upper": 0.1, - "default": 0.001 + "default_value": 0.001 } ], "conditions": [], @@ -1121,7 +1121,7 @@ "log": true, "lower": 0.03125, "upper": 32768.0, - "default": 1.0 + "default_value": 1.0 }, { "name": "gamma", @@ -1129,7 +1129,7 @@ "log": true, "lower": 3.0517578125e-05, "upper": 8.0, - "default": 0.1 + "default_value": 0.1 }, { "name": "kernel", @@ -1148,7 +1148,7 @@ "True", "False" ], - "default": "True", + "default_value": "True", "probabilities": null }, { @@ -1157,7 +1157,7 @@ "log": true, "lower": 1e-05, "upper": 0.1, - "default": 0.001 + "default_value": 0.001 } ], "conditions": [], @@ -1176,7 +1176,7 @@ "log": true, "lower": 0.03125, "upper": 32768.0, - "default": 1.0 + "default_value": 1.0 }, { "name": "coef0", @@ -1184,7 +1184,7 @@ "log": false, "lower": -1.0, "upper": 1.0, - "default": 0.0 + "default_value": 0.0 }, { "name": "degree", @@ -1192,7 +1192,7 @@ "log": false, "lower": 2, "upper": 5, - "default": 3 + "default_value": 3 }, { "name": "gamma", @@ -1200,7 +1200,7 @@ "log": true, "lower": 3.0517578125e-05, "upper": 8.0, - "default": 0.1 + "default_value": 0.1 }, { "name": "kernel", @@ -1219,7 +1219,7 @@ "True", "False" ], - "default": "True", + "default_value": "True", "probabilities": null }, { @@ -1228,7 +1228,7 @@ "log": true, "lower": 1e-05, "upper": 0.1, - "default": 0.001 + "default_value": 0.001 } ], "conditions": [], @@ -1247,7 +1247,7 @@ "log": true, "lower": 0.03125, "upper": 32768.0, - "default": 1.0 + "default_value": 1.0 }, { "name": "coef0", @@ -1255,7 +1255,7 @@ "log": false, "lower": -1.0, "upper": 1.0, - "default": 0.0 + "default_value": 0.0 }, { "name": "gamma", @@ -1263,7 +1263,7 @@ "log": true, "lower": 3.0517578125e-05, "upper": 8.0, - "default": 0.1 + "default_value": 0.1 }, { "name": "kernel", @@ -1282,7 +1282,7 @@ "True", "False" ], - "default": "True", + "default_value": "True", "probabilities": null }, { @@ -1291,7 +1291,7 @@ "log": true, "lower": 1e-05, "upper": 0.1, - "default": 0.001 + "default_value": 0.001 } ], "conditions": [], @@ -1304,33 +1304,13 @@ "class": "sklearn.linear_model.LogisticRegression", "params": { "hyperparameters": [ - { - "name": "penalty", - "type": "categorical", - "choices": [ - "l1", - "l2", - "elasticnet", - "None" - ], - "default": "l2" - }, - { - "name": "dual", - "type": "categorical", - "choices": [ - "True", - "False" - ], - "default": "False" - }, { "name": "C", "type": "uniform_float", "log": true, "lower": 0.03125, "upper": 32768.0, - "default": 1.0 + "default_value": 1.0 } ], "conditions": [], @@ -1348,7 +1328,7 @@ "tanh", "relu" ], - "default": "relu", + "default_value": "relu", "probabilities": null }, { @@ -1357,7 +1337,7 @@ "log": true, "lower": 1e-07, "upper": 0.1, - "default": 0.0001 + "default_value": 0.0001 }, { "name": "batch_size", @@ -1381,7 +1361,7 @@ "valid", "train" ], - "default": "valid", + "default_value": "valid", "probabilities": null }, { @@ -1395,7 +1375,7 @@ "log": false, "lower": 1, "upper": 3, - "default": 1 + "default_value": 1 }, { "name": "learning_rate_init", @@ -1403,7 +1383,7 @@ "log": true, "lower": 0.0001, "upper": 0.5, - "default": 0.001 + "default_value": 0.001 }, { "name": "n_iter_no_change", @@ -1416,7 +1396,7 @@ "log": true, "lower": 16, "upper": 264, - "default": 32 + "default_value": 32 }, { "name": "shuffle", @@ -1464,7 +1444,7 @@ "log": true, "lower": 0.01, "upper": 100.0, - "default": 1.0 + "default_value": 1.0 }, { "name": "fit_prior", @@ -1473,7 +1453,7 @@ "True", "False" ], - "default": "True", + "default_value": "True", "probabilities": null } ], diff --git a/python/naiveautoml/searchspace-regression.json b/python/naiveautoml/searchspace-regression.json index 9f29b7b..67304af 100644 --- a/python/naiveautoml/searchspace-regression.json +++ b/python/naiveautoml/searchspace-regression.json @@ -42,7 +42,7 @@ "log": false, "lower": 10, "upper": 2000, - "default": 1000 + "default_value": 1000 }, { "name": "output_distribution", @@ -51,7 +51,7 @@ "uniform", "normal" ], - "default": "uniform", + "default_value": "uniform", "probabilities": null } ], @@ -71,7 +71,7 @@ "log": false, "lower": 0.7, "upper": 0.999, - "default": 0.75 + "default_value": 0.75 }, { "name": "q_min", @@ -79,7 +79,7 @@ "log": false, "lower": 0.001, "upper": 0.3, - "default": 0.25 + "default_value": 0.25 } ], "conditions": [], @@ -124,7 +124,7 @@ "parallel", "deflation" ], - "default": "parallel", + "default_value": "parallel", "probabilities": null }, { @@ -135,7 +135,7 @@ "exp", "cube" ], - "default": "logcosh", + "default_value": "logcosh", "probabilities": null }, { @@ -146,7 +146,7 @@ "arbitrary-variance", "unit-variance" ], - "default": "False", + "default_value": "False", "probabilities": null }, { @@ -155,7 +155,7 @@ "log": false, "lower": 10, "upper": 2000, - "default": 100 + "default_value": 100 } ], "conditions": [ @@ -185,7 +185,7 @@ "l1", "l2" ], - "default": "euclidean", + "default_value": "euclidean", "probabilities": null }, { @@ -196,7 +196,7 @@ "complete", "average" ], - "default": "ward", + "default_value": "ward", "probabilities": null }, { @@ -205,7 +205,7 @@ "log": false, "lower": 2, "upper": 400, - "default": 25 + "default_value": 25 }, { "name": "pooling_func", @@ -215,7 +215,7 @@ "median", "max" ], - "default": "mean", + "default_value": "mean", "probabilities": null } ], @@ -260,7 +260,7 @@ "sigmoid", "cosine" ], - "default": "rbf", + "default_value": "rbf", "probabilities": null }, { @@ -269,7 +269,7 @@ "log": false, "lower": 10, "upper": 2000, - "default": 100 + "default_value": 100 }, { "name": "coef0", @@ -277,7 +277,7 @@ "log": false, "lower": -1.0, "upper": 1.0, - "default": 0.0 + "default_value": 0.0 }, { "name": "degree", @@ -285,7 +285,7 @@ "log": false, "lower": 2, "upper": 5, - "default": 3 + "default_value": 3 }, { "name": "gamma", @@ -293,7 +293,7 @@ "log": true, "lower": 3.0517578125e-05, "upper": 8.0, - "default": 0.01 + "default_value": 0.01 } ], "conditions": [ @@ -337,7 +337,7 @@ "log": true, "lower": 3.0517578125e-05, "upper": 8.0, - "default": 1.0 + "default_value": 1.0 }, { "name": "n_components", @@ -345,7 +345,7 @@ "log": true, "lower": 50, "upper": 10000, - "default": 100 + "default_value": 100 } ], "conditions": [], @@ -368,7 +368,7 @@ "cosine", "chi2" ], - "default": "rbf", + "default_value": "rbf", "probabilities": null }, { @@ -377,7 +377,7 @@ "log": true, "lower": 50, "upper": 10000, - "default": 100 + "default_value": 100 }, { "name": "coef0", @@ -385,7 +385,7 @@ "log": false, "lower": -1.0, "upper": 1.0, - "default": 0.0 + "default_value": 0.0 }, { "name": "degree", @@ -393,7 +393,7 @@ "log": false, "lower": 2, "upper": 5, - "default": 3 + "default_value": 3 }, { "name": "gamma", @@ -401,7 +401,7 @@ "log": true, "lower": 3.0517578125e-05, "upper": 8.0, - "default": 0.1 + "default_value": 0.1 } ], "conditions": [ @@ -447,7 +447,7 @@ "log": false, "lower": 0.5, "upper": 0.9999, - "default": 0.9999 + "default_value": 0.9999 }, { "name": "whiten", @@ -456,7 +456,7 @@ "False", "True" ], - "default": "False", + "default_value": "False", "probabilities": null } ], @@ -476,7 +476,7 @@ "log": false, "lower": 2, "upper": 3, - "default": 2 + "default_value": 2 }, { "name": "include_bias", @@ -485,7 +485,7 @@ "True", "False" ], - "default": "True", + "default_value": "True", "probabilities": null }, { @@ -495,7 +495,7 @@ "False", "True" ], - "default": "False", + "default_value": "False", "probabilities": null } ], @@ -521,7 +521,7 @@ "True", "False" ], - "default": "False", + "default_value": "False", "probabilities": null }, { @@ -533,7 +533,7 @@ "absolute_error", "squared_error" ], - "default": "squared_error", + "default_value": "squared_error", "probabilities": null }, { @@ -547,7 +547,7 @@ "log": false, "lower": 0.1, "upper": 1.0, - "default": 1.0 + "default_value": 1.0 }, { "name": "max_leaf_nodes", @@ -565,7 +565,7 @@ "log": false, "lower": 1, "upper": 20, - "default": 1 + "default_value": 1 }, { "name": "min_samples_split", @@ -573,7 +573,7 @@ "log": false, "lower": 2, "upper": 20, - "default": 2 + "default_value": 2 }, { "name": "min_weight_fraction_leaf", @@ -598,7 +598,7 @@ "True", "False" ], - "default": "True", + "default_value": "True", "probabilities": null }, { @@ -610,7 +610,7 @@ "poisson", "absolute_error" ], - "default": "squared_error", + "default_value": "squared_error", "probabilities": null }, { @@ -624,7 +624,7 @@ "log": false, "lower": 0.1, "upper": 1.0, - "default": 1.0 + "default_value": 1.0 }, { "name": "max_leaf_nodes", @@ -642,7 +642,7 @@ "log": false, "lower": 1, "upper": 20, - "default": 1 + "default_value": 1 }, { "name": "min_samples_split", @@ -650,7 +650,7 @@ "log": false, "lower": 2, "upper": 20, - "default": 2 + "default_value": 2 }, { "name": "min_weight_fraction_leaf", @@ -676,7 +676,7 @@ "valid", "train" ], - "default": "off", + "default_value": "off", "probabilities": null }, { @@ -685,7 +685,7 @@ "log": true, "lower": 1e-10, "upper": 1.0, - "default": 1e-10 + "default_value": 1e-10 }, { "name": "learning_rate", @@ -693,7 +693,7 @@ "log": true, "lower": 0.01, "upper": 1.0, - "default": 0.1 + "default_value": 0.1 }, { "name": "loss", @@ -705,7 +705,7 @@ "gamma", "squared_error" ], - "default": "squared_error", + "default_value": "squared_error", "probabilities": null }, { @@ -724,7 +724,7 @@ "log": true, "lower": 3, "upper": 2047, - "default": 31 + "default_value": 31 }, { "name": "min_samples_leaf", @@ -732,7 +732,7 @@ "log": true, "lower": 1, "upper": 200, - "default": 20 + "default_value": 20 }, { "name": "scoring", @@ -750,7 +750,7 @@ "log": false, "lower": 1, "upper": 20, - "default": 10 + "default_value": 10 }, { "name": "validation_fraction", @@ -758,7 +758,7 @@ "log": false, "lower": 0.01, "upper": 0.4, - "default": 0.1 + "default_value": 0.1 }, { "name": "quantile", @@ -766,7 +766,7 @@ "log": false, "lower": 0.01, "upper": 0.99, - "default": 0.5 + "default_value": 0.5 } ], "conditions": [ @@ -810,7 +810,7 @@ "absolute_error", "squared_error" ], - "default": "squared_error", + "default_value": "squared_error", "probabilities": null }, { @@ -819,7 +819,7 @@ "log": false, "lower": 0.0, "upper": 2.0, - "default": 0.5 + "default_value": 0.5 }, { "name": "max_features", @@ -842,7 +842,7 @@ "log": false, "lower": 1, "upper": 20, - "default": 1 + "default_value": 1 }, { "name": "min_samples_split", @@ -850,7 +850,7 @@ "log": false, "lower": 2, "upper": 20, - "default": 2 + "default_value": 2 }, { "name": "min_weight_fraction_leaf", @@ -874,7 +874,7 @@ "log": false, "lower": 1e-10, "upper": 0.001, - "default": 1e-06 + "default_value": 1e-06 }, { "name": "alpha_2", @@ -882,7 +882,7 @@ "log": true, "lower": 1e-10, "upper": 0.001, - "default": 1e-06 + "default_value": 1e-06 }, { "name": "fit_intercept", @@ -895,7 +895,7 @@ "log": true, "lower": 1e-10, "upper": 0.001, - "default": 1e-06 + "default_value": 1e-06 }, { "name": "lambda_2", @@ -903,10 +903,10 @@ "log": true, "lower": 1e-10, "upper": 0.001, - "default": 1e-06 + "default_value": 1e-06 }, { - "name": "n_iter", + "name": "max_iter", "type": "constant", "value": 300 }, @@ -916,7 +916,7 @@ "log": true, "lower": 1000.0, "upper": 100000.0, - "default": 10000.0 + "default_value": 10000.0 }, { "name": "tol", @@ -924,7 +924,7 @@ "log": true, "lower": 1e-05, "upper": 0.1, - "default": 0.001 + "default_value": 0.001 } ], "conditions": [], @@ -943,7 +943,7 @@ "log": true, "lower": 1e-10, "upper": 1.0, - "default": 1e-08 + "default_value": 1e-08 }, { "name": "thetaL", @@ -951,7 +951,7 @@ "log": true, "lower": 1e-10, "upper": 0.001, - "default": 1e-06 + "default_value": 1e-06 }, { "name": "thetaU", @@ -959,7 +959,7 @@ "log": true, "lower": 1.0, "upper": 100000.0, - "default": 100000.0 + "default_value": 100000.0 } ], "conditions": [], @@ -978,7 +978,7 @@ "log": true, "lower": 1, "upper": 100, - "default": 1 + "default_value": 1 }, { "name": "p", @@ -987,7 +987,7 @@ 1, 2 ], - "default": 2, + "default_value": 2, "probabilities": null }, { @@ -997,7 +997,7 @@ "uniform", "distance" ], - "default": "uniform", + "default_value": "uniform", "probabilities": null } ], @@ -1017,7 +1017,7 @@ "log": true, "lower": 0.03125, "upper": 32768.0, - "default": 1.0 + "default_value": 1.0 }, { "name": "dual", @@ -1030,7 +1030,7 @@ "log": true, "lower": 0.001, "upper": 1.0, - "default": 0.1 + "default_value": 0.1 }, { "name": "fit_intercept", @@ -1049,7 +1049,7 @@ "epsilon_insensitive", "squared_epsilon_insensitive" ], - "default": "squared_epsilon_insensitive", + "default_value": "squared_epsilon_insensitive", "probabilities": null }, { @@ -1058,7 +1058,7 @@ "log": true, "lower": 1e-05, "upper": 0.1, - "default": 0.0001 + "default_value": 0.0001 } ], "conditions": [], @@ -1094,7 +1094,7 @@ "log": true, "lower": 0.01, "upper": 2.0, - "default": 0.1 + "default_value": 0.1 }, { "name": "loss", @@ -1104,7 +1104,7 @@ "square", "exponential" ], - "default": "linear", + "default_value": "linear", "probabilities": null }, { @@ -1113,7 +1113,7 @@ "log": false, "lower": 1, "upper": 10, - "default": 1 + "default_value": 1 }, { "name": "n_estimators", @@ -1121,7 +1121,7 @@ "log": false, "lower": 50, "upper": 500, - "default": 50 + "default_value": 50 } ], "conditions": [], @@ -1140,7 +1140,7 @@ "log": true, "lower": 0.03125, "upper": 32768.0, - "default": 1.0 + "default_value": 1.0 }, { "name": "epsilon", @@ -1148,7 +1148,7 @@ "log": true, "lower": 0.001, "upper": 1.0, - "default": 0.1 + "default_value": 0.1 }, { "name": "kernel", @@ -1159,7 +1159,7 @@ "rbf", "sigmoid" ], - "default": "rbf", + "default_value": "rbf", "probabilities": null }, { @@ -1174,7 +1174,7 @@ "True", "False" ], - "default": "True", + "default_value": "True", "probabilities": null }, { @@ -1183,7 +1183,7 @@ "log": true, "lower": 1e-05, "upper": 0.1, - "default": 0.001 + "default_value": 0.001 }, { "name": "coef0", @@ -1191,7 +1191,7 @@ "log": false, "lower": -1.0, "upper": 1.0, - "default": 0.0 + "default_value": 0.0 }, { "name": "degree", @@ -1199,7 +1199,7 @@ "log": false, "lower": 2, "upper": 5, - "default": 3 + "default_value": 3 }, { "name": "gamma", @@ -1207,7 +1207,7 @@ "log": true, "lower": 3.0517578125e-05, "upper": 8.0, - "default": 0.1 + "default_value": 0.1 } ], "conditions": [ @@ -1256,7 +1256,7 @@ "tanh", "relu" ], - "default": "tanh", + "default_value": "tanh", "probabilities": null }, { @@ -1265,7 +1265,7 @@ "log": true, "lower": 1e-07, "upper": 0.1, - "default": 0.0001 + "default_value": 0.0001 }, { "name": "batch_size", @@ -1289,7 +1289,7 @@ "valid", "train" ], - "default": "valid", + "default_value": "valid", "probabilities": null }, { @@ -1303,7 +1303,7 @@ "log": false, "lower": 1, "upper": 3, - "default": 1 + "default_value": 1 }, { "name": "learning_rate_init", @@ -1311,7 +1311,7 @@ "log": true, "lower": 0.0001, "upper": 0.5, - "default": 0.001 + "default_value": 0.001 }, { "name": "n_iter_no_change", @@ -1324,7 +1324,7 @@ "log": true, "lower": 16, "upper": 264, - "default": 32 + "default_value": 32 }, { "name": "shuffle", @@ -1372,7 +1372,7 @@ "log": true, "lower": 1e-07, "upper": 0.1, - "default": 0.0001 + "default_value": 0.0001 }, { "name": "average", @@ -1381,7 +1381,7 @@ "False", "True" ], - "default": "False", + "default_value": "False", "probabilities": null }, { @@ -1397,7 +1397,7 @@ "invscaling", "constant" ], - "default": "invscaling", + "default_value": "invscaling", "probabilities": null }, { @@ -1409,7 +1409,7 @@ "epsilon_insensitive", "squared_epsilon_insensitive" ], - "default": "squared_error", + "default_value": "squared_error", "probabilities": null }, { @@ -1420,7 +1420,7 @@ "l2", "elasticnet" ], - "default": "l2", + "default_value": "l2", "probabilities": null }, { @@ -1429,7 +1429,7 @@ "log": true, "lower": 1e-05, "upper": 0.1, - "default": 0.0001 + "default_value": 0.0001 }, { "name": "epsilon", @@ -1437,7 +1437,7 @@ "log": true, "lower": 1e-05, "upper": 0.1, - "default": 0.1 + "default_value": 0.1 }, { "name": "eta0", @@ -1445,7 +1445,7 @@ "log": true, "lower": 1e-07, "upper": 0.1, - "default": 0.01 + "default_value": 0.01 }, { "name": "l1_ratio", @@ -1453,7 +1453,7 @@ "log": true, "lower": 1e-09, "upper": 1.0, - "default": 0.15 + "default_value": 0.15 }, { "name": "power_t", @@ -1461,7 +1461,7 @@ "log": false, "lower": 1e-05, "upper": 1.0, - "default": 0.25 + "default_value": 0.25 } ], "conditions": [ diff --git a/python/setup.py b/python/setup.py index b17f694..c671b1e 100644 --- a/python/setup.py +++ b/python/setup.py @@ -4,7 +4,7 @@ setup( name = 'naiveautoml', packages = find_packages(exclude=["test"]), - version = '0.1.3', + version = '0.1.5', license='MIT', description = 'Fast and Timeout-Free Automated Machine Learning for Multi-Class classification, Multi-Label classification, and regression.', author = 'Felix Mohr', @@ -12,11 +12,11 @@ url = 'https://github.com/fmohr/naiveautoml', keywords = ['AutoML', 'sklearn', 'naive', 'simple', 'multi-class', 'multi-label', 'regression', 'no timeouts'], install_requires=[ - 'numpy==1.26.4', + 'numpy<2', 'pandas', - 'scikit-learn==1.4.2', + 'scikit-learn==1.5.2', 'scikit-multilearn==0.2.0', - 'configspace<0.7.1', + 'configspace==1.2.0', 'scipy', 'pynisher', 'psutil', diff --git a/python/test/test_naiveautoml.py b/python/test/test_naiveautoml.py index 50306e6..e057451 100644 --- a/python/test/test_naiveautoml.py +++ b/python/test/test_naiveautoml.py @@ -1,7 +1,19 @@ import logging import pytest -from sklearn.metrics import get_scorer +from sklearn.metrics import get_scorer, make_scorer +from sklearn.ensemble import ( + HistGradientBoostingClassifier, + HistGradientBoostingRegressor, + RandomForestClassifier, + RandomForestRegressor, + ExtraTreesClassifier, + ExtraTreesRegressor, + AdaBoostClassifier, + AdaBoostRegressor +) +from sklearn.gaussian_process import GaussianProcessRegressor +from sklearn.neural_network import MLPClassifier, MLPRegressor import naiveautoml import numpy as np @@ -56,6 +68,55 @@ def evaluate_nb_best(pl, X, y, scoring_functions): ) +class TurboEvaluator(Callable): + + def __init__(self): + self.history = [] + + def reset(self): + self.history = [] + + def __call__(self, pl, X, y, scoring_functions): + learner = pl.steps[-1][1] + if isinstance(learner, tuple([ + HistGradientBoostingClassifier, + HistGradientBoostingRegressor, + RandomForestClassifier, + RandomForestRegressor, + ExtraTreesClassifier, + ExtraTreesRegressor, + AdaBoostClassifier, + AdaBoostRegressor + ])): + learner.n_estimators = 2 + if isinstance(learner, tuple([ + HistGradientBoostingClassifier, + HistGradientBoostingRegressor + ])): + learner.max_iter = 10 + + elif isinstance(learner, (MLPClassifier, MLPRegressor)): + learner.max_iter = 2 + + if isinstance(learner, GaussianProcessRegressor): + learner.n_restarts_optimizer=1 + + + X_train, X_val, y_train, y_val = sklearn.model_selection.train_test_split(X, y, train_size=80, test_size=50) + learner = sklearn.base.clone(pl).fit(X_train, y_train) + results = { + s["name"]: s["fun"](learner, X_val, y_val) + for s in scoring_functions + } + evaluation_report = { + s["name"]: {} for s in scoring_functions + } + return results, evaluation_report + + def update(self, pl, results): + self.history.append([pl, results]) + + class TestNaiveAutoML(unittest.TestCase): @staticmethod @@ -272,37 +333,39 @@ def test_constant_algorithms_in_hpo_phase(self): X, y = get_dataset(61) # run naml - np.random.seed(round(time.time())) + np.random.seed(0)#round(time.time())) naml = naiveautoml.NaiveAutoML( logger_name="naml", timeout_overall=60, max_hpo_iterations=10, show_progress=True, - evaluation_fun=evaluate_randomly + evaluation_fun=evaluate_randomly, + random_state=0 ) naml.fit(X, y) print(naml.history[["learner_class", "neg_log_loss"]]) # check that there is only one combination of algorithms in the HPO phase history = naml.history.iloc[naml.steps_after_which_algorithm_selection_was_completed:] - self.assertTrue(len(pd.unique(history["learner_class"])) == 1) - self.assertTrue(len(pd.unique(history["data-pre-processor_class"])) == 1) - self.assertTrue(len(pd.unique(history["feature-pre-processor_class"])) == 1) - - # get best solution from phase 1 - phase_1_solutions = naml.history.iloc[:naml.steps_after_which_algorithm_selection_was_completed] - phase_1_solutions = phase_1_solutions[phase_1_solutions[naml.task.scoring["name"]].notna()] - best_solution_in_phase_1 = phase_1_solutions.sort_values(naml.task.scoring["name"]).iloc[-1] - - for step in ["data-pre-processor", "feature-pre-processor", "learner"]: - field = f"{step}_class" - class_in_phase1 = best_solution_in_phase_1[field] - class_in_phase2 = pd.unique(history[field])[0] - self.assertEqual( - class_in_phase1, - class_in_phase2, - f"Choice for {step} should conicide but is {class_in_phase1} in AS phase and {class_in_phase2} in HPO." - ) + if len(history) > 0: + self.assertTrue(len(pd.unique(history["learner_class"])) == 1) + self.assertTrue(len(pd.unique(history["data-pre-processor_class"])) == 1) + self.assertTrue(len(pd.unique(history["feature-pre-processor_class"])) == 1) + + # get best solution from phase 1 + phase_1_solutions = naml.history.iloc[:naml.steps_after_which_algorithm_selection_was_completed] + phase_1_solutions = phase_1_solutions[phase_1_solutions[naml.task.scoring["name"]].notna()] + best_solution_in_phase_1 = phase_1_solutions.sort_values(naml.task.scoring["name"]).iloc[-1] + + for step in ["data-pre-processor", "feature-pre-processor", "learner"]: + field = f"{step}_class" + class_in_phase1 = best_solution_in_phase_1[field] + class_in_phase2 = pd.unique(history[field])[0] + self.assertEqual( + class_in_phase1, + class_in_phase2, + f"Choice for {step} should conicide but is {class_in_phase1} in AS phase and {class_in_phase2} in HPO." + ) """ @@ -490,21 +553,24 @@ def test_individual_scoring(self, openmlid, exp_runtime, exp_result): X, y = get_dataset(openmlid) self.logger.info(f"Testing individual scoring function on dataset {openml}") - scoring1 = { - "name": "accuracy", - "score_func": lambda y, y_pred: np.count_nonzero(y == y_pred) / len(y), - "greater_is_better": True, - "needs_proba": False, - "needs_threshold": False - } - scoring2 = { - "name": "errorrate", - "score_func": lambda y, y_pred: np.count_nonzero(y != y_pred) / len(y), - "greater_is_better": False, - "needs_proba": False, - "needs_threshold": False - } - scorer = sklearn.metrics.make_scorer(**{k: v for k, v in scoring1.items() if k != "name"}) + scoring1 = ( + "accuracy", + make_scorer( + score_func=lambda y, y_pred: np.count_nonzero(y == y_pred) / len(y), + greater_is_better=True, + response_method="predict" + ) + ) + + scoring2 = ( + "errorrate", + make_scorer( + score_func=lambda y, y_pred: np.count_nonzero(y != y_pred) / len(y), + greater_is_better=False, + response_method="predict" + ) + ) + scorer = scoring1[1] # run naml scores = [] @@ -533,6 +599,7 @@ def test_individual_scoring(self, openmlid, exp_runtime, exp_result): # compute test performance self.logger.debug(f"finished training on seed {seed} after {int(np.round(runtime))}s. Now computing performance of solution.") + print(scorer) score = scorer(naml, X_test, y_test) scores.append(score) self.logger.debug(f"finished test on seed {seed}. Test score for this run is {score}") @@ -654,7 +721,7 @@ def update(self, pl, results): def test_searchspaces(self): for openmlid, task_type in { - #61: "classification", # iris + 61: "classification", # iris 531: "regression" # boston housing }.items(): @@ -667,8 +734,7 @@ def test_searchspaces(self): task_type=task_type, scoring=scoring, timeout_candidate=2, - evaluation_fun="mccv", - kwargs_evaluation_fun={"n_splits": 1} + evaluation_fun=TurboEvaluator() ) task = naml.get_task_from_data(X, y, None) naml.reset(task) @@ -701,10 +767,15 @@ def test_searchspaces(self): }) # get HPO process for supposed selection + config_space = helper.get_config_space_for_selected_algorithms(selection) + if len(config_space) == 0: + self.logger.info("Config space is empty, nothing to check.") + continue + hp_optimizer.reset( task=task, runtime_of_default_config=0, - config_space=helper.get_config_space_for_selected_algorithms(selection), + config_space=config_space, history_descriptor_creation_fun=lambda hp_config: naml.algorithm_selector.create_history_descriptor(faked_as_info, hp_config), evaluator=naml.evaluator, is_pipeline_forbidden=naml.algorithm_selector.is_pipeline_forbidden, @@ -728,7 +799,8 @@ def test_searchspaces(self): "There are significant negative eigenvalues", "ValueError: array must not contain infs or NaNs", "ValueError: Input X contains infinity or a value too large for", - "ValueError: illegal value in 4th argument of internal gesdd" + "ValueError: illegal value in 4th argument of internal gesdd", + "ValueError: Found array with 0 feature(s)" ] if not any([t in exception for t in allowed_exception_texts]): self.logger.exception(exception) @@ -745,7 +817,12 @@ def test_process_leak(self, openmlid): X, y = get_dataset(openmlid) self.logger.info(f"Start test of individual stateful evaluation function on dataset {openmlid}.") - X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, train_size=0.8) + X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split( + X, + y, + train_size=10, + test_size=10 + ) for i in range(1, 21): self.logger.info(f"Run {i}-th instance") automl = naiveautoml.NaiveAutoML( diff --git a/python/usage-example.ipynb b/python/usage-example.ipynb index 088fd01..aa59c10 100644 --- a/python/usage-example.ipynb +++ b/python/usage-example.ipynb @@ -2,10 +2,62 @@ "cells": [ { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "42bd5e12", - "metadata": {}, - "outputs": [], + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: naiveautoml in /home/felix/anaconda3/envs/naml/lib/python3.12/site-packages (0.1.3)\n", + "Requirement already satisfied: numpy<2 in /home/felix/anaconda3/envs/naml/lib/python3.12/site-packages (from naiveautoml) (1.26.4)\n", + "Requirement already satisfied: pandas in /home/felix/anaconda3/envs/naml/lib/python3.12/site-packages (from naiveautoml) (2.2.3)\n", + "Requirement already satisfied: scikit-learn==1.5.2 in /home/felix/anaconda3/envs/naml/lib/python3.12/site-packages (from naiveautoml) (1.5.2)\n", + "Requirement already satisfied: scikit-multilearn==0.2.0 in /home/felix/anaconda3/envs/naml/lib/python3.12/site-packages (from naiveautoml) (0.2.0)\n", + "Requirement already satisfied: configspace==1.2.0 in /home/felix/anaconda3/envs/naml/lib/python3.12/site-packages (from naiveautoml) (1.2.0)\n", + "Requirement already satisfied: scipy in /home/felix/anaconda3/envs/naml/lib/python3.12/site-packages (from naiveautoml) (1.14.1)\n", + "Requirement already satisfied: pynisher in /home/felix/anaconda3/envs/naml/lib/python3.12/site-packages (from naiveautoml) (1.0.10)\n", + "Requirement already satisfied: psutil in /home/felix/anaconda3/envs/naml/lib/python3.12/site-packages (from naiveautoml) (6.0.0)\n", + "Requirement already satisfied: tqdm in /home/felix/anaconda3/envs/naml/lib/python3.12/site-packages (from naiveautoml) (4.66.5)\n", + "Requirement already satisfied: parameterized in /home/felix/anaconda3/envs/naml/lib/python3.12/site-packages (from naiveautoml) (0.9.0)\n", + "Requirement already satisfied: openml in /home/felix/anaconda3/envs/naml/lib/python3.12/site-packages (from naiveautoml) (0.14.2)\n", + "Requirement already satisfied: lccv in /home/felix/anaconda3/envs/naml/lib/python3.12/site-packages (from naiveautoml) (0.2.2)\n", + "Requirement already satisfied: pyparsing in /home/felix/anaconda3/envs/naml/lib/python3.12/site-packages (from configspace==1.2.0->naiveautoml) (3.1.4)\n", + "Requirement already satisfied: typing-extensions in /home/felix/anaconda3/envs/naml/lib/python3.12/site-packages (from configspace==1.2.0->naiveautoml) (4.12.2)\n", + "Requirement already satisfied: more-itertools in /home/felix/anaconda3/envs/naml/lib/python3.12/site-packages (from configspace==1.2.0->naiveautoml) (10.5.0)\n", + "Requirement already satisfied: joblib>=1.2.0 in /home/felix/anaconda3/envs/naml/lib/python3.12/site-packages (from scikit-learn==1.5.2->naiveautoml) (1.4.2)\n", + "Requirement already satisfied: threadpoolctl>=3.1.0 in /home/felix/anaconda3/envs/naml/lib/python3.12/site-packages (from scikit-learn==1.5.2->naiveautoml) (3.5.0)\n", + "Requirement already satisfied: matplotlib in /home/felix/anaconda3/envs/naml/lib/python3.12/site-packages (from lccv->naiveautoml) (3.9.2)\n", + "Requirement already satisfied: liac-arff>=2.4.0 in /home/felix/anaconda3/envs/naml/lib/python3.12/site-packages (from openml->naiveautoml) (2.5.0)\n", + "Requirement already satisfied: xmltodict in /home/felix/anaconda3/envs/naml/lib/python3.12/site-packages (from openml->naiveautoml) (0.13.0)\n", + "Requirement already satisfied: requests in /home/felix/anaconda3/envs/naml/lib/python3.12/site-packages (from openml->naiveautoml) (2.32.3)\n", + "Requirement already satisfied: python-dateutil in /home/felix/anaconda3/envs/naml/lib/python3.12/site-packages (from openml->naiveautoml) (2.9.0.post0)\n", + "Requirement already satisfied: minio in /home/felix/anaconda3/envs/naml/lib/python3.12/site-packages (from openml->naiveautoml) (7.2.8)\n", + "Requirement already satisfied: pyarrow in /home/felix/anaconda3/envs/naml/lib/python3.12/site-packages (from openml->naiveautoml) (17.0.0)\n", + "Requirement already satisfied: pytz>=2020.1 in /home/felix/anaconda3/envs/naml/lib/python3.12/site-packages (from pandas->naiveautoml) (2024.2)\n", + "Requirement already satisfied: tzdata>=2022.7 in /home/felix/anaconda3/envs/naml/lib/python3.12/site-packages (from pandas->naiveautoml) (2024.2)\n", + "Requirement already satisfied: six>=1.5 in /home/felix/anaconda3/envs/naml/lib/python3.12/site-packages (from python-dateutil->openml->naiveautoml) (1.16.0)\n", + "Requirement already satisfied: contourpy>=1.0.1 in /home/felix/anaconda3/envs/naml/lib/python3.12/site-packages (from matplotlib->lccv->naiveautoml) (1.3.0)\n", + "Requirement already satisfied: cycler>=0.10 in /home/felix/anaconda3/envs/naml/lib/python3.12/site-packages (from matplotlib->lccv->naiveautoml) (0.12.1)\n", + "Requirement already satisfied: fonttools>=4.22.0 in /home/felix/anaconda3/envs/naml/lib/python3.12/site-packages (from matplotlib->lccv->naiveautoml) (4.54.0)\n", + "Requirement already satisfied: kiwisolver>=1.3.1 in /home/felix/anaconda3/envs/naml/lib/python3.12/site-packages (from matplotlib->lccv->naiveautoml) (1.4.7)\n", + "Requirement already satisfied: packaging>=20.0 in /home/felix/anaconda3/envs/naml/lib/python3.12/site-packages (from matplotlib->lccv->naiveautoml) (24.1)\n", + "Requirement already satisfied: pillow>=8 in /home/felix/anaconda3/envs/naml/lib/python3.12/site-packages (from matplotlib->lccv->naiveautoml) (10.4.0)\n", + "Requirement already satisfied: certifi in /home/felix/anaconda3/envs/naml/lib/python3.12/site-packages (from minio->openml->naiveautoml) (2024.8.30)\n", + "Requirement already satisfied: urllib3 in /home/felix/anaconda3/envs/naml/lib/python3.12/site-packages (from minio->openml->naiveautoml) (2.2.3)\n", + "Requirement already satisfied: argon2-cffi in /home/felix/anaconda3/envs/naml/lib/python3.12/site-packages (from minio->openml->naiveautoml) (23.1.0)\n", + "Requirement already satisfied: pycryptodome in /home/felix/anaconda3/envs/naml/lib/python3.12/site-packages (from minio->openml->naiveautoml) (3.20.0)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /home/felix/anaconda3/envs/naml/lib/python3.12/site-packages (from requests->openml->naiveautoml) (3.3.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /home/felix/anaconda3/envs/naml/lib/python3.12/site-packages (from requests->openml->naiveautoml) (3.10)\n", + "Requirement already satisfied: argon2-cffi-bindings in /home/felix/anaconda3/envs/naml/lib/python3.12/site-packages (from argon2-cffi->minio->openml->naiveautoml) (21.2.0)\n", + "Requirement already satisfied: cffi>=1.0.1 in /home/felix/anaconda3/envs/naml/lib/python3.12/site-packages (from argon2-cffi-bindings->argon2-cffi->minio->openml->naiveautoml) (1.17.1)\n", + "Requirement already satisfied: pycparser in /home/felix/anaconda3/envs/naml/lib/python3.12/site-packages (from cffi>=1.0.1->argon2-cffi-bindings->argon2-cffi->minio->openml->naiveautoml) (2.22)\n" + ] + } + ], "source": [ "!pip install naiveautoml" ] @@ -35,7 +87,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 31/31 [00:02<00:00, 11.46it/s]\n" + "100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:07<00:00, 4.12it/s]\n" ] }, { @@ -49,7 +101,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 22.73it/s]\n" + "100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:24<00:00, 2.49s/it]\n" ] } ], @@ -476,13 +528,10 @@ " /* fitted */\n", " background-color: var(--sklearn-color-fitted-level-3);\n", "}\n", - "
Pipeline(steps=[('data-pre-processor', MinMaxScaler()),\n",
-       "                ('learner', LinearDiscriminantAnalysis())])
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + "
Pipeline(steps=[('learner', ExtraTreesClassifier())])
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ - "Pipeline(steps=[('data-pre-processor', MinMaxScaler()),\n", - " ('learner', LinearDiscriminantAnalysis())])" + "Pipeline(steps=[('learner', ExtraTreesClassifier())])" ] }, "execution_count": 3, @@ -544,74 +593,74 @@ " \n", " \n", " 0\n", - " 15\n", - " 1.716840e+09\n", - " 0.038556\n", - " (MinMaxScaler(), LinearDiscriminantAnalysis())\n", + " 0\n", + " 1.728326e+09\n", + " 0.522395\n", + " ((ExtraTreeClassifier(random_state=1959646086)...\n", " True\n", - " -0.0256\n", + " -0.0570\n", " True\n", - " {'neg_log_loss': [-0.002495394501050993, -0.02...\n", + " {'neg_log_loss': [-0.035560745675446245, -0.03...\n", " ok\n", " None\n", - " sklearn.preprocessing._data.MinMaxScaler\n", " None\n", " None\n", " None\n", - " sklearn.discriminant_analysis.LinearDiscrimina...\n", + " None\n", + " sklearn.ensemble._forest.ExtraTreesClassifier\n", " None\n", " \n", " \n", " 1\n", - " 22\n", - " 1.716840e+09\n", - " 0.076023\n", - " (MinMaxScaler(), FastICA(), LinearDiscriminant...\n", + " 8\n", + " 1.728326e+09\n", + " 0.028414\n", + " (QuadraticDiscriminantAnalysis())\n", " True\n", - " -0.0290\n", + " -0.0648\n", " False\n", - " {'neg_log_loss': [-0.021755102475042885, -0.08...\n", + " {'neg_log_loss': [-0.075598137132969, -0.08836...\n", " ok\n", " None\n", - " sklearn.preprocessing._data.MinMaxScaler\n", " None\n", - " sklearn.decomposition._fastica.FastICA\n", " None\n", - " sklearn.discriminant_analysis.LinearDiscrimina...\n", + " None\n", + " None\n", + " sklearn.discriminant_analysis.QuadraticDiscrim...\n", " None\n", " \n", " \n", " 2\n", - " 20\n", - " 1.716840e+09\n", - " 0.037481\n", - " (StandardScaler(), LinearDiscriminantAnalysis())\n", + " 5\n", + " 1.728326e+09\n", + " 0.021561\n", + " (GaussianNB())\n", " True\n", - " -0.0320\n", + " -0.0763\n", " False\n", - " {'neg_log_loss': [-0.06935021251033252, -0.026...\n", + " {'neg_log_loss': [-0.1416176110952844, -0.0492...\n", " ok\n", " None\n", - " sklearn.preprocessing._data.StandardScaler\n", " None\n", " None\n", " None\n", - " sklearn.discriminant_analysis.LinearDiscrimina...\n", + " None\n", + " sklearn.naive_bayes.GaussianNB\n", " None\n", " \n", " \n", " 3\n", - " 17\n", - " 1.716840e+09\n", - " 0.073377\n", - " (PowerTransformer(), LinearDiscriminantAnalysi...\n", + " 7\n", + " 1.728326e+09\n", + " 0.030384\n", + " (LinearDiscriminantAnalysis())\n", " True\n", - " -0.0403\n", + " -0.0775\n", " False\n", - " {'neg_log_loss': [-0.019349148853344624, -0.07...\n", + " {'neg_log_loss': [-0.04503314380131569, -0.091...\n", " ok\n", " None\n", - " sklearn.preprocessing._data.PowerTransformer\n", + " None\n", " None\n", " None\n", " None\n", @@ -620,470 +669,432 @@ " \n", " \n", " 4\n", - " 35\n", - " 1.716840e+09\n", - " 0.048369\n", - " (MinMaxScaler(), LinearDiscriminantAnalysis(sh...\n", - " False\n", - " -0.0471\n", + " 1\n", + " 1.728326e+09\n", + " 0.588323\n", + " (RandomForestClassifier())\n", " True\n", - " {'neg_log_loss': [-0.04895887982577556, -0.079...\n", + " -0.0821\n", + " False\n", + " {'neg_log_loss': [-0.04251933728837645, -0.189...\n", " ok\n", " None\n", - " sklearn.preprocessing._data.MinMaxScaler\n", " None\n", " None\n", " None\n", - " sklearn.discriminant_analysis.LinearDiscrimina...\n", - " {'shrinkage': 'auto', 'tol': 1.917751462463796...\n", + " None\n", + " sklearn.ensemble._forest.RandomForestClassifier\n", + " None\n", " \n", " \n", " 5\n", - " 40\n", - " 1.716840e+09\n", - " 0.037374\n", - " (MinMaxScaler(), LinearDiscriminantAnalysis(so...\n", - " False\n", - " -0.0499\n", + " 22\n", + " 1.728326e+09\n", + " 0.506231\n", + " (VarianceThreshold(), ExtraTreesClassifier())\n", " True\n", - " {'neg_log_loss': [-0.018734488484043823, -0.08...\n", + " -0.0890\n", + " False\n", + " {'neg_log_loss': [-0.07277377026732057, -0.074...\n", " ok\n", " None\n", - " sklearn.preprocessing._data.MinMaxScaler\n", + " sklearn.feature_selection._variance_threshold....\n", " None\n", " None\n", " None\n", - " sklearn.discriminant_analysis.LinearDiscrimina...\n", - " {'shrinkage': 'None', 'tol': 0.0871508939358007}\n", + " sklearn.ensemble._forest.ExtraTreesClassifier\n", + " None\n", " \n", " \n", " 6\n", - " 38\n", - " 1.716840e+09\n", - " 0.034237\n", - " (MinMaxScaler(), LinearDiscriminantAnalysis(so...\n", - " False\n", - " -0.0503\n", + " 17\n", + " 1.728326e+09\n", + " 0.575651\n", + " (Normalizer(), ExtraTreesClassifier())\n", " True\n", - " {'neg_log_loss': [-0.0765949969130555, -0.0462...\n", + " -0.0899\n", + " False\n", + " {'neg_log_loss': [-0.02478927940887918, -0.159...\n", " ok\n", " None\n", - " sklearn.preprocessing._data.MinMaxScaler\n", + " sklearn.preprocessing._data.Normalizer\n", " None\n", " None\n", " None\n", - " sklearn.discriminant_analysis.LinearDiscrimina...\n", - " {'shrinkage': 'None', 'tol': 0.000159870783723...\n", + " sklearn.ensemble._forest.ExtraTreesClassifier\n", + " None\n", " \n", " \n", " 7\n", - " 33\n", - " 1.716840e+09\n", - " 0.036004\n", - " (MinMaxScaler(), LinearDiscriminantAnalysis(so...\n", - " False\n", - " -0.0510\n", + " 13\n", + " 1.728326e+09\n", + " 0.097281\n", + " (LogisticRegression())\n", " True\n", - " {'neg_log_loss': [-0.13321562481779245, -0.010...\n", + " -0.1103\n", + " False\n", + " {'neg_log_loss': [-0.09439910457891872, -0.131...\n", " ok\n", " None\n", - " sklearn.preprocessing._data.MinMaxScaler\n", " None\n", " None\n", " None\n", - " sklearn.discriminant_analysis.LinearDiscrimina...\n", - " {'shrinkage': 'None', 'tol': 0.000478226112929...\n", + " None\n", + " sklearn.linear_model.LogisticRegression\n", + " None\n", " \n", " \n", " 8\n", - " 31\n", - " 1.716840e+09\n", - " 0.048536\n", - " (MinMaxScaler(), LinearDiscriminantAnalysis(so...\n", - " False\n", - " -0.0512\n", + " 29\n", + " 1.728326e+09\n", + " 0.660204\n", + " (PolynomialFeatures(), ExtraTreesClassifier())\n", " True\n", - " {'neg_log_loss': [-0.07254237857271588, -0.027...\n", + " -0.1176\n", + " False\n", + " {'neg_log_loss': [-0.14511456353964386, -0.070...\n", " ok\n", " None\n", - " sklearn.preprocessing._data.MinMaxScaler\n", " None\n", " None\n", + " sklearn.preprocessing._polynomial.PolynomialFe...\n", + " None\n", + " sklearn.ensemble._forest.ExtraTreesClassifier\n", " None\n", - " sklearn.discriminant_analysis.LinearDiscrimina...\n", - " {'shrinkage': 'None', 'tol': 1.735826848763034...\n", " \n", " \n", " 9\n", - " 18\n", - " 1.716840e+09\n", - " 0.040574\n", - " (QuantileTransformer(), LinearDiscriminantAnal...\n", + " 23\n", + " 1.728326e+09\n", + " 0.685154\n", + " (FastICA(), ExtraTreesClassifier())\n", " True\n", - " -0.0556\n", + " -0.1258\n", " False\n", - " {'neg_log_loss': [-0.11018273067707582, -0.008...\n", + " {'neg_log_loss': [-0.1329229165317684, -0.0874...\n", " ok\n", " None\n", - " sklearn.preprocessing._data.QuantileTransformer\n", " None\n", " None\n", + " sklearn.decomposition._fastica.FastICA\n", " None\n", - " sklearn.discriminant_analysis.LinearDiscrimina...\n", + " sklearn.ensemble._forest.ExtraTreesClassifier\n", " None\n", " \n", " \n", " 10\n", - " 36\n", - " 1.716840e+09\n", - " 0.045721\n", - " (MinMaxScaler(), LinearDiscriminantAnalysis(sh...\n", - " False\n", - " -0.0561\n", + " 28\n", + " 1.728326e+09\n", + " 0.590059\n", + " (PCA(), ExtraTreesClassifier())\n", " True\n", - " {'neg_log_loss': [-0.0862133174270862, -0.0457...\n", + " -0.1593\n", + " False\n", + " {'neg_log_loss': [-0.18733720031337187, -0.140...\n", " ok\n", " None\n", - " sklearn.preprocessing._data.MinMaxScaler\n", " None\n", " None\n", + " sklearn.decomposition._pca.PCA\n", + " None\n", + " sklearn.ensemble._forest.ExtraTreesClassifier\n", " None\n", - " sklearn.discriminant_analysis.LinearDiscrimina...\n", - " {'shrinkage': 'auto', 'tol': 9.083819419770749...\n", " \n", " \n", " 11\n", - " 34\n", - " 1.716840e+09\n", - " 0.046783\n", - " (MinMaxScaler(), LinearDiscriminantAnalysis(sh...\n", + " 32\n", + " 1.728326e+09\n", + " 2.375114\n", + " (ExtraTreesClassifier(max_features=0.725304868...\n", " False\n", - " -0.0578\n", + " -0.1765\n", " True\n", - " {'neg_log_loss': [-0.07504199300724208, -0.032...\n", + " {'neg_log_loss': [-0.16982338373638153, -0.178...\n", " ok\n", " None\n", - " sklearn.preprocessing._data.MinMaxScaler\n", " None\n", " None\n", " None\n", - " sklearn.discriminant_analysis.LinearDiscrimina...\n", - " {'shrinkage': 'auto', 'tol': 0.000390299911886...\n", + " None\n", + " sklearn.ensemble._forest.ExtraTreesClassifier\n", + " {'bootstrap': 'False', 'criterion': 'gini', 'm...\n", " \n", " \n", " 12\n", - " 19\n", - " 1.716840e+09\n", - " 0.045145\n", - " (RobustScaler(), LinearDiscriminantAnalysis())\n", + " 14\n", + " 1.728326e+09\n", + " 0.559326\n", + " (MLPClassifier())\n", " True\n", - " -0.0589\n", + " -0.2347\n", " False\n", - " {'neg_log_loss': [-0.06260639546974095, -0.008...\n", + " {'neg_log_loss': [-0.28821802181927886, -0.214...\n", " ok\n", " None\n", - " sklearn.preprocessing._data.RobustScaler\n", " None\n", " None\n", " None\n", - " sklearn.discriminant_analysis.LinearDiscrimina...\n", + " None\n", + " sklearn.neural_network._multilayer_perceptron....\n", " None\n", " \n", " \n", " 13\n", " 39\n", - " 1.716840e+09\n", - " 0.036426\n", - " (MinMaxScaler(), LinearDiscriminantAnalysis(sh...\n", + " 1.728326e+09\n", + " 3.200782\n", + " (ExtraTreesClassifier(bootstrap=True, criterio...\n", " False\n", - " -0.0644\n", + " -0.2413\n", " True\n", - " {'neg_log_loss': [-0.19533490226858358, -0.055...\n", + " {'neg_log_loss': [-0.22551765074824667, -0.249...\n", " ok\n", " None\n", - " sklearn.preprocessing._data.MinMaxScaler\n", " None\n", " None\n", " None\n", - " sklearn.discriminant_analysis.LinearDiscrimina...\n", - " {'shrinkage': 'manual', 'tol': 0.0013352412850...\n", + " None\n", + " sklearn.ensemble._forest.ExtraTreesClassifier\n", + " {'bootstrap': 'True', 'criterion': 'entropy', ...\n", " \n", " \n", " 14\n", - " 37\n", - " 1.716840e+09\n", - " 0.036773\n", - " (MinMaxScaler(), LinearDiscriminantAnalysis(so...\n", + " 41\n", + " 1.728326e+09\n", + " 2.047107\n", + " (ExtraTreesClassifier(max_features=0.934182480...\n", " False\n", - " -0.0663\n", + " -0.2471\n", " True\n", - " {'neg_log_loss': [-0.044188917458848724, -0.07...\n", + " {'neg_log_loss': [-0.21506280777346423, -0.261...\n", " ok\n", " None\n", - " sklearn.preprocessing._data.MinMaxScaler\n", " None\n", " None\n", " None\n", - " sklearn.discriminant_analysis.LinearDiscrimina...\n", - " {'shrinkage': 'None', 'tol': 0.04619198175661532}\n", + " None\n", + " sklearn.ensemble._forest.ExtraTreesClassifier\n", + " {'bootstrap': 'False', 'criterion': 'gini', 'm...\n", " \n", " \n", " 15\n", - " 27\n", - " 1.716840e+09\n", - " 0.048136\n", - " (MinMaxScaler(), PCA(), LinearDiscriminantAnal...\n", - " True\n", - " -0.0677\n", + " 34\n", + " 1.728326e+09\n", + " 2.374602\n", + " (ExtraTreesClassifier(criterion='entropy', max...\n", " False\n", - " {'neg_log_loss': [-0.02622884537934961, -0.037...\n", + " -0.3156\n", + " True\n", + " {'neg_log_loss': [-0.29650689548241016, -0.325...\n", " ok\n", " None\n", - " sklearn.preprocessing._data.MinMaxScaler\n", " None\n", - " sklearn.decomposition._pca.PCA\n", " None\n", - " sklearn.discriminant_analysis.LinearDiscrimina...\n", " None\n", + " None\n", + " sklearn.ensemble._forest.ExtraTreesClassifier\n", + " {'bootstrap': 'False', 'criterion': 'entropy',...\n", " \n", " \n", " 16\n", - " 7\n", - " 1.716840e+09\n", - " 0.030830\n", - " (LinearDiscriminantAnalysis())\n", - " True\n", - " -0.0741\n", + " 2\n", + " 1.728326e+09\n", + " 0.672299\n", + " (HistGradientBoostingClassifier())\n", " True\n", - " {'neg_log_loss': [-0.05479774773701625, -0.086...\n", + " -0.3245\n", + " False\n", + " {'neg_log_loss': [-0.576429902881904, -0.23101...\n", " ok\n", " None\n", " None\n", " None\n", " None\n", " None\n", - " sklearn.discriminant_analysis.LinearDiscrimina...\n", + " sklearn.ensemble.HistGradientBoostingClassifier\n", " None\n", " \n", " \n", " 17\n", - " 21\n", - " 1.716840e+09\n", - " 0.040023\n", - " (VarianceThreshold(), LinearDiscriminantAnalys...\n", - " True\n", - " -0.0841\n", + " 33\n", + " 1.728326e+09\n", + " 2.957551\n", + " (ExtraTreesClassifier(bootstrap=True, criterio...\n", " False\n", - " {'neg_log_loss': [-0.09759327975387903, -0.119...\n", + " -0.4980\n", + " True\n", + " {'neg_log_loss': [-0.4845302776973054, -0.5045...\n", " ok\n", " None\n", - " sklearn.feature_selection._variance_threshold....\n", " None\n", " None\n", " None\n", - " sklearn.discriminant_analysis.LinearDiscrimina...\n", " None\n", + " sklearn.ensemble._forest.ExtraTreesClassifier\n", + " {'bootstrap': 'True', 'criterion': 'entropy', ...\n", " \n", " \n", " 18\n", - " 29\n", - " 1.716840e+09\n", - " 0.049416\n", - " (MinMaxScaler(), SelectPercentile(), LinearDis...\n", + " 15\n", + " 1.728326e+09\n", + " 0.027914\n", + " (MultinomialNB())\n", " True\n", - " -0.0864\n", + " -0.5671\n", " False\n", - " {'neg_log_loss': [-0.07460611584690269, -0.082...\n", + " {'neg_log_loss': [-0.5706108218760035, -0.5635...\n", " ok\n", " None\n", - " sklearn.preprocessing._data.MinMaxScaler\n", " None\n", - " sklearn.feature_selection._univariate_selectio...\n", " None\n", - " sklearn.discriminant_analysis.LinearDiscrimina...\n", + " None\n", + " None\n", + " sklearn.naive_bayes.MultinomialNB\n", " None\n", " \n", " \n", " 19\n", - " 16\n", - " 1.716840e+09\n", - " 0.053158\n", - " (Normalizer(), LinearDiscriminantAnalysis())\n", - " True\n", - " -0.0957\n", + " 37\n", + " 1.728326e+09\n", + " 1.904881\n", + " (ExtraTreesClassifier(max_features=0.187438391...\n", " False\n", - " {'neg_log_loss': [-0.008705640849406344, -0.01...\n", + " -0.6068\n", + " True\n", + " {'neg_log_loss': [-0.5887696263887042, -0.6157...\n", " ok\n", " None\n", - " sklearn.preprocessing._data.Normalizer\n", " None\n", " None\n", " None\n", - " sklearn.discriminant_analysis.LinearDiscrimina...\n", " None\n", + " sklearn.ensemble._forest.ExtraTreesClassifier\n", + " {'bootstrap': 'False', 'criterion': 'gini', 'm...\n", " \n", " \n", " 20\n", - " 0\n", - " 1.716840e+09\n", - " 0.393223\n", - " (ExtraTreesClassifier())\n", - " True\n", - " -0.1086\n", + " 24\n", + " 1.728326e+09\n", + " 0.744971\n", + " (FeatureAgglomeration(), ExtraTreesClassifier())\n", " True\n", - " {'neg_log_loss': [-0.1480640892431135, -0.0363...\n", + " -0.6356\n", + " False\n", + " {'neg_log_loss': [-1.3318127497285062, -0.1355...\n", " ok\n", " None\n", " None\n", " None\n", - " None\n", + " sklearn.cluster._agglomerative.FeatureAgglomer...\n", " None\n", " sklearn.ensemble._forest.ExtraTreesClassifier\n", " None\n", " \n", " \n", " 21\n", - " 8\n", - " 1.716840e+09\n", - " 0.024728\n", - " (QuadraticDiscriminantAnalysis())\n", - " True\n", - " -0.1127\n", + " 38\n", + " 1.728326e+09\n", + " 2.679551\n", + " (ExtraTreesClassifier(bootstrap=True, max_feat...\n", " False\n", - " {'neg_log_loss': [-0.09153325016961104, -0.119...\n", + " -0.6397\n", + " True\n", + " {'neg_log_loss': [-0.638336218959347, -0.64710...\n", " ok\n", " None\n", " None\n", " None\n", " None\n", " None\n", - " sklearn.discriminant_analysis.QuadraticDiscrim...\n", - " None\n", + " sklearn.ensemble._forest.ExtraTreesClassifier\n", + " {'bootstrap': 'True', 'criterion': 'gini', 'ma...\n", " \n", " \n", " 22\n", - " 32\n", - " 1.716840e+09\n", - " 0.040617\n", - " (MinMaxScaler(), LinearDiscriminantAnalysis(so...\n", + " 35\n", + " 1.728326e+09\n", + " 1.824337\n", + " (ExtraTreesClassifier(criterion='entropy', max...\n", " False\n", - " -0.1186\n", + " -0.6590\n", " True\n", - " {'neg_log_loss': [-0.129130290057952, -0.20282...\n", + " {'neg_log_loss': [-0.6835637540082007, -0.6613...\n", " ok\n", " None\n", - " sklearn.preprocessing._data.MinMaxScaler\n", " None\n", " None\n", " None\n", - " sklearn.discriminant_analysis.LinearDiscrimina...\n", - " {'shrinkage': 'None', 'tol': 0.011865399901662...\n", + " None\n", + " sklearn.ensemble._forest.ExtraTreesClassifier\n", + " {'bootstrap': 'False', 'criterion': 'entropy',...\n", " \n", " \n", " 23\n", - " 1\n", - " 1.716840e+09\n", - " 0.523188\n", - " (RandomForestClassifier())\n", - " True\n", - " -0.1224\n", + " 36\n", + " 1.728326e+09\n", + " 2.628361\n", + " (ExtraTreesClassifier(bootstrap=True, max_feat...\n", " False\n", - " {'neg_log_loss': [-0.17370315489963523, -0.142...\n", - " ok\n", - " None\n", - " None\n", - " None\n", - " None\n", - " None\n", - " sklearn.ensemble._forest.RandomForestClassifier\n", - " None\n", - " \n", - " \n", - " 24\n", - " 5\n", - " 1.716840e+09\n", - " 0.026534\n", - " (GaussianNB())\n", + " -0.7553\n", " True\n", - " -0.1362\n", - " False\n", - " {'neg_log_loss': [-0.20918447713333907, -0.069...\n", + " {'neg_log_loss': [-0.7583635062267161, -0.7650...\n", " ok\n", " None\n", " None\n", " None\n", " None\n", " None\n", - " sklearn.naive_bayes.GaussianNB\n", - " None\n", + " sklearn.ensemble._forest.ExtraTreesClassifier\n", + " {'bootstrap': 'True', 'criterion': 'gini', 'ma...\n", " \n", " \n", - " 25\n", - " 30\n", - " 1.716840e+09\n", - " 0.053656\n", - " (MinMaxScaler(), GenericUnivariateSelect(), Li...\n", - " True\n", - " -0.1615\n", + " 24\n", + " 40\n", + " 1.728326e+09\n", + " 2.829714\n", + " (ExtraTreesClassifier(bootstrap=True, criterio...\n", " False\n", - " {'neg_log_loss': [-0.16943166611189786, -0.171...\n", - " ok\n", - " None\n", - " sklearn.preprocessing._data.MinMaxScaler\n", - " None\n", - " sklearn.feature_selection._univariate_selectio...\n", - " None\n", - " sklearn.discriminant_analysis.LinearDiscrimina...\n", - " None\n", - " \n", - " \n", - " 26\n", - " 13\n", - " 1.716840e+09\n", - " 0.445522\n", - " (MLPClassifier())\n", + " -0.7584\n", " True\n", - " -0.2662\n", - " False\n", - " {'neg_log_loss': [-0.24803474747552914, -0.248...\n", + " {'neg_log_loss': [-0.7271546410115207, -0.7667...\n", " ok\n", " None\n", " None\n", " None\n", " None\n", " None\n", - " sklearn.neural_network._multilayer_perceptron....\n", - " None\n", + " sklearn.ensemble._forest.ExtraTreesClassifier\n", + " {'bootstrap': 'True', 'criterion': 'entropy', ...\n", " \n", " \n", - " 27\n", - " 2\n", - " 1.716840e+09\n", - " 0.532068\n", - " (HistGradientBoostingClassifier())\n", + " 25\n", + " 4\n", + " 1.728326e+09\n", + " 0.028692\n", + " (DecisionTreeClassifier())\n", " True\n", - " -0.3808\n", + " -0.9612\n", " False\n", - " {'neg_log_loss': [-0.4711134475626598, -0.4294...\n", + " {'neg_log_loss': [-2.4029102259411435, -1.2014...\n", " ok\n", " None\n", " None\n", " None\n", " None\n", " None\n", - " sklearn.ensemble.HistGradientBoostingClassifier\n", + " sklearn.tree._classes.DecisionTreeClassifier\n", " None\n", " \n", " \n", - " 28\n", + " 26\n", " 6\n", - " 1.716840e+09\n", - " 0.026399\n", + " 1.728326e+09\n", + " 0.028472\n", " (KNeighborsClassifier())\n", " True\n", - " -0.5414\n", + " -1.0164\n", " False\n", - " {'neg_log_loss': [-0.08555168796095076, -0.024...\n", + " {'neg_log_loss': [-1.2407969888941925, -1.2490...\n", " ok\n", " None\n", " None\n", @@ -1094,29 +1105,29 @@ " None\n", " \n", " \n", - " 29\n", - " 14\n", - " 1.716840e+09\n", - " 0.028472\n", - " (MultinomialNB())\n", + " 27\n", + " 30\n", + " 1.728326e+09\n", + " 0.597963\n", + " (SelectPercentile(), ExtraTreesClassifier())\n", " True\n", - " -0.5686\n", + " -1.0293\n", " False\n", - " {'neg_log_loss': [-0.5637805289461314, -0.5694...\n", + " {'neg_log_loss': [-1.25672938219069, -1.272544...\n", " ok\n", " None\n", " None\n", " None\n", + " sklearn.feature_selection._univariate_selectio...\n", " None\n", - " None\n", - " sklearn.naive_bayes.MultinomialNB\n", + " sklearn.ensemble._forest.ExtraTreesClassifier\n", " None\n", " \n", " \n", - " 30\n", + " 28\n", " 3\n", - " 1.716840e+09\n", - " 0.031201\n", + " 1.728326e+09\n", + " 0.034009\n", " (BernoulliNB())\n", " True\n", " -1.0986\n", @@ -1132,22 +1143,22 @@ " None\n", " \n", " \n", - " 31\n", - " 4\n", - " 1.716840e+09\n", - " 0.024085\n", - " (DecisionTreeClassifier())\n", + " 29\n", + " 31\n", + " 1.728326e+09\n", + " 0.650155\n", + " (GenericUnivariateSelect(), ExtraTreesClassifi...\n", " True\n", - " -1.4417\n", + " -1.9644\n", " False\n", - " {'neg_log_loss': [-2.4029102259411435, -4.4408...\n", + " {'neg_log_loss': [-3.6461241045282278, -1.2611...\n", " ok\n", " None\n", " None\n", " None\n", + " sklearn.feature_selection._univariate_selectio...\n", " None\n", - " None\n", - " sklearn.tree._classes.DecisionTreeClassifier\n", + " sklearn.ensemble._forest.ExtraTreesClassifier\n", " None\n", " \n", " \n", @@ -1156,174 +1167,164 @@ ], "text/plain": [ " order time runtime \\\n", - "0 15 1.716840e+09 0.038556 \n", - "1 22 1.716840e+09 0.076023 \n", - "2 20 1.716840e+09 0.037481 \n", - "3 17 1.716840e+09 0.073377 \n", - "4 35 1.716840e+09 0.048369 \n", - "5 40 1.716840e+09 0.037374 \n", - "6 38 1.716840e+09 0.034237 \n", - "7 33 1.716840e+09 0.036004 \n", - "8 31 1.716840e+09 0.048536 \n", - "9 18 1.716840e+09 0.040574 \n", - "10 36 1.716840e+09 0.045721 \n", - "11 34 1.716840e+09 0.046783 \n", - "12 19 1.716840e+09 0.045145 \n", - "13 39 1.716840e+09 0.036426 \n", - "14 37 1.716840e+09 0.036773 \n", - "15 27 1.716840e+09 0.048136 \n", - "16 7 1.716840e+09 0.030830 \n", - "17 21 1.716840e+09 0.040023 \n", - "18 29 1.716840e+09 0.049416 \n", - "19 16 1.716840e+09 0.053158 \n", - "20 0 1.716840e+09 0.393223 \n", - "21 8 1.716840e+09 0.024728 \n", - "22 32 1.716840e+09 0.040617 \n", - "23 1 1.716840e+09 0.523188 \n", - "24 5 1.716840e+09 0.026534 \n", - "25 30 1.716840e+09 0.053656 \n", - "26 13 1.716840e+09 0.445522 \n", - "27 2 1.716840e+09 0.532068 \n", - "28 6 1.716840e+09 0.026399 \n", - "29 14 1.716840e+09 0.028472 \n", - "30 3 1.716840e+09 0.031201 \n", - "31 4 1.716840e+09 0.024085 \n", + "0 0 1.728326e+09 0.522395 \n", + "1 8 1.728326e+09 0.028414 \n", + "2 5 1.728326e+09 0.021561 \n", + "3 7 1.728326e+09 0.030384 \n", + "4 1 1.728326e+09 0.588323 \n", + "5 22 1.728326e+09 0.506231 \n", + "6 17 1.728326e+09 0.575651 \n", + "7 13 1.728326e+09 0.097281 \n", + "8 29 1.728326e+09 0.660204 \n", + "9 23 1.728326e+09 0.685154 \n", + "10 28 1.728326e+09 0.590059 \n", + "11 32 1.728326e+09 2.375114 \n", + "12 14 1.728326e+09 0.559326 \n", + "13 39 1.728326e+09 3.200782 \n", + "14 41 1.728326e+09 2.047107 \n", + "15 34 1.728326e+09 2.374602 \n", + "16 2 1.728326e+09 0.672299 \n", + "17 33 1.728326e+09 2.957551 \n", + "18 15 1.728326e+09 0.027914 \n", + "19 37 1.728326e+09 1.904881 \n", + "20 24 1.728326e+09 0.744971 \n", + "21 38 1.728326e+09 2.679551 \n", + "22 35 1.728326e+09 1.824337 \n", + "23 36 1.728326e+09 2.628361 \n", + "24 40 1.728326e+09 2.829714 \n", + "25 4 1.728326e+09 0.028692 \n", + "26 6 1.728326e+09 0.028472 \n", + "27 30 1.728326e+09 0.597963 \n", + "28 3 1.728326e+09 0.034009 \n", + "29 31 1.728326e+09 0.650155 \n", "\n", " pipeline default_hp \\\n", - "0 (MinMaxScaler(), LinearDiscriminantAnalysis()) True \n", - "1 (MinMaxScaler(), FastICA(), LinearDiscriminant... True \n", - "2 (StandardScaler(), LinearDiscriminantAnalysis()) True \n", - "3 (PowerTransformer(), LinearDiscriminantAnalysi... True \n", - "4 (MinMaxScaler(), LinearDiscriminantAnalysis(sh... False \n", - "5 (MinMaxScaler(), LinearDiscriminantAnalysis(so... False \n", - "6 (MinMaxScaler(), LinearDiscriminantAnalysis(so... False \n", - "7 (MinMaxScaler(), LinearDiscriminantAnalysis(so... False \n", - "8 (MinMaxScaler(), LinearDiscriminantAnalysis(so... False \n", - "9 (QuantileTransformer(), LinearDiscriminantAnal... True \n", - "10 (MinMaxScaler(), LinearDiscriminantAnalysis(sh... False \n", - "11 (MinMaxScaler(), LinearDiscriminantAnalysis(sh... False \n", - "12 (RobustScaler(), LinearDiscriminantAnalysis()) True \n", - "13 (MinMaxScaler(), LinearDiscriminantAnalysis(sh... False \n", - "14 (MinMaxScaler(), LinearDiscriminantAnalysis(so... False \n", - "15 (MinMaxScaler(), PCA(), LinearDiscriminantAnal... True \n", - "16 (LinearDiscriminantAnalysis()) True \n", - "17 (VarianceThreshold(), LinearDiscriminantAnalys... True \n", - "18 (MinMaxScaler(), SelectPercentile(), LinearDis... True \n", - "19 (Normalizer(), LinearDiscriminantAnalysis()) True \n", - "20 (ExtraTreesClassifier()) True \n", - "21 (QuadraticDiscriminantAnalysis()) True \n", - "22 (MinMaxScaler(), LinearDiscriminantAnalysis(so... False \n", - "23 (RandomForestClassifier()) True \n", - "24 (GaussianNB()) True \n", - "25 (MinMaxScaler(), GenericUnivariateSelect(), Li... True \n", - "26 (MLPClassifier()) True \n", - "27 (HistGradientBoostingClassifier()) True \n", - "28 (KNeighborsClassifier()) True \n", - "29 (MultinomialNB()) True \n", - "30 (BernoulliNB()) True \n", - "31 (DecisionTreeClassifier()) True \n", + "0 ((ExtraTreeClassifier(random_state=1959646086)... True \n", + "1 (QuadraticDiscriminantAnalysis()) True \n", + "2 (GaussianNB()) True \n", + "3 (LinearDiscriminantAnalysis()) True \n", + "4 (RandomForestClassifier()) True \n", + "5 (VarianceThreshold(), ExtraTreesClassifier()) True \n", + "6 (Normalizer(), ExtraTreesClassifier()) True \n", + "7 (LogisticRegression()) True \n", + "8 (PolynomialFeatures(), ExtraTreesClassifier()) True \n", + "9 (FastICA(), ExtraTreesClassifier()) True \n", + "10 (PCA(), ExtraTreesClassifier()) True \n", + "11 (ExtraTreesClassifier(max_features=0.725304868... False \n", + "12 (MLPClassifier()) True \n", + "13 (ExtraTreesClassifier(bootstrap=True, criterio... False \n", + "14 (ExtraTreesClassifier(max_features=0.934182480... False \n", + "15 (ExtraTreesClassifier(criterion='entropy', max... False \n", + "16 (HistGradientBoostingClassifier()) True \n", + "17 (ExtraTreesClassifier(bootstrap=True, criterio... False \n", + "18 (MultinomialNB()) True \n", + "19 (ExtraTreesClassifier(max_features=0.187438391... False \n", + "20 (FeatureAgglomeration(), ExtraTreesClassifier()) True \n", + "21 (ExtraTreesClassifier(bootstrap=True, max_feat... False \n", + "22 (ExtraTreesClassifier(criterion='entropy', max... False \n", + "23 (ExtraTreesClassifier(bootstrap=True, max_feat... False \n", + "24 (ExtraTreesClassifier(bootstrap=True, criterio... False \n", + "25 (DecisionTreeClassifier()) True \n", + "26 (KNeighborsClassifier()) True \n", + "27 (SelectPercentile(), ExtraTreesClassifier()) True \n", + "28 (BernoulliNB()) True \n", + "29 (GenericUnivariateSelect(), ExtraTreesClassifi... True \n", "\n", " neg_log_loss new_best evaluation_report \\\n", - "0 -0.0256 True {'neg_log_loss': [-0.002495394501050993, -0.02... \n", - "1 -0.0290 False {'neg_log_loss': [-0.021755102475042885, -0.08... \n", - "2 -0.0320 False {'neg_log_loss': [-0.06935021251033252, -0.026... \n", - "3 -0.0403 False {'neg_log_loss': [-0.019349148853344624, -0.07... \n", - "4 -0.0471 True {'neg_log_loss': [-0.04895887982577556, -0.079... \n", - "5 -0.0499 True {'neg_log_loss': [-0.018734488484043823, -0.08... \n", - "6 -0.0503 True {'neg_log_loss': [-0.0765949969130555, -0.0462... \n", - "7 -0.0510 True {'neg_log_loss': [-0.13321562481779245, -0.010... \n", - "8 -0.0512 True {'neg_log_loss': [-0.07254237857271588, -0.027... \n", - "9 -0.0556 False {'neg_log_loss': [-0.11018273067707582, -0.008... \n", - "10 -0.0561 True {'neg_log_loss': [-0.0862133174270862, -0.0457... \n", - "11 -0.0578 True {'neg_log_loss': [-0.07504199300724208, -0.032... \n", - "12 -0.0589 False {'neg_log_loss': [-0.06260639546974095, -0.008... \n", - "13 -0.0644 True {'neg_log_loss': [-0.19533490226858358, -0.055... \n", - "14 -0.0663 True {'neg_log_loss': [-0.044188917458848724, -0.07... \n", - "15 -0.0677 False {'neg_log_loss': [-0.02622884537934961, -0.037... \n", - "16 -0.0741 True {'neg_log_loss': [-0.05479774773701625, -0.086... \n", - "17 -0.0841 False {'neg_log_loss': [-0.09759327975387903, -0.119... \n", - "18 -0.0864 False {'neg_log_loss': [-0.07460611584690269, -0.082... \n", - "19 -0.0957 False {'neg_log_loss': [-0.008705640849406344, -0.01... \n", - "20 -0.1086 True {'neg_log_loss': [-0.1480640892431135, -0.0363... \n", - "21 -0.1127 False {'neg_log_loss': [-0.09153325016961104, -0.119... \n", - "22 -0.1186 True {'neg_log_loss': [-0.129130290057952, -0.20282... \n", - "23 -0.1224 False {'neg_log_loss': [-0.17370315489963523, -0.142... \n", - "24 -0.1362 False {'neg_log_loss': [-0.20918447713333907, -0.069... \n", - "25 -0.1615 False {'neg_log_loss': [-0.16943166611189786, -0.171... \n", - "26 -0.2662 False {'neg_log_loss': [-0.24803474747552914, -0.248... \n", - "27 -0.3808 False {'neg_log_loss': [-0.4711134475626598, -0.4294... \n", - "28 -0.5414 False {'neg_log_loss': [-0.08555168796095076, -0.024... \n", - "29 -0.5686 False {'neg_log_loss': [-0.5637805289461314, -0.5694... \n", - "30 -1.0986 False {'neg_log_loss': [-1.09861228866811, -1.098612... \n", - "31 -1.4417 False {'neg_log_loss': [-2.4029102259411435, -4.4408... \n", + "0 -0.0570 True {'neg_log_loss': [-0.035560745675446245, -0.03... \n", + "1 -0.0648 False {'neg_log_loss': [-0.075598137132969, -0.08836... \n", + "2 -0.0763 False {'neg_log_loss': [-0.1416176110952844, -0.0492... \n", + "3 -0.0775 False {'neg_log_loss': [-0.04503314380131569, -0.091... \n", + "4 -0.0821 False {'neg_log_loss': [-0.04251933728837645, -0.189... \n", + "5 -0.0890 False {'neg_log_loss': [-0.07277377026732057, -0.074... \n", + "6 -0.0899 False {'neg_log_loss': [-0.02478927940887918, -0.159... \n", + "7 -0.1103 False {'neg_log_loss': [-0.09439910457891872, -0.131... \n", + "8 -0.1176 False {'neg_log_loss': [-0.14511456353964386, -0.070... \n", + "9 -0.1258 False {'neg_log_loss': [-0.1329229165317684, -0.0874... \n", + "10 -0.1593 False {'neg_log_loss': [-0.18733720031337187, -0.140... \n", + "11 -0.1765 True {'neg_log_loss': [-0.16982338373638153, -0.178... \n", + "12 -0.2347 False {'neg_log_loss': [-0.28821802181927886, -0.214... \n", + "13 -0.2413 True {'neg_log_loss': [-0.22551765074824667, -0.249... \n", + "14 -0.2471 True {'neg_log_loss': [-0.21506280777346423, -0.261... \n", + "15 -0.3156 True {'neg_log_loss': [-0.29650689548241016, -0.325... \n", + "16 -0.3245 False {'neg_log_loss': [-0.576429902881904, -0.23101... \n", + "17 -0.4980 True {'neg_log_loss': [-0.4845302776973054, -0.5045... \n", + "18 -0.5671 False {'neg_log_loss': [-0.5706108218760035, -0.5635... \n", + "19 -0.6068 True {'neg_log_loss': [-0.5887696263887042, -0.6157... \n", + "20 -0.6356 False {'neg_log_loss': [-1.3318127497285062, -0.1355... \n", + "21 -0.6397 True {'neg_log_loss': [-0.638336218959347, -0.64710... \n", + "22 -0.6590 True {'neg_log_loss': [-0.6835637540082007, -0.6613... \n", + "23 -0.7553 True {'neg_log_loss': [-0.7583635062267161, -0.7650... \n", + "24 -0.7584 True {'neg_log_loss': [-0.7271546410115207, -0.7667... \n", + "25 -0.9612 False {'neg_log_loss': [-2.4029102259411435, -1.2014... \n", + "26 -1.0164 False {'neg_log_loss': [-1.2407969888941925, -1.2490... \n", + "27 -1.0293 False {'neg_log_loss': [-1.25672938219069, -1.272544... \n", + "28 -1.0986 False {'neg_log_loss': [-1.09861228866811, -1.098612... \n", + "29 -1.9644 False {'neg_log_loss': [-3.6461241045282278, -1.2611... \n", "\n", " status exception data-pre-processor_class \\\n", - "0 ok None sklearn.preprocessing._data.MinMaxScaler \n", - "1 ok None sklearn.preprocessing._data.MinMaxScaler \n", - "2 ok None sklearn.preprocessing._data.StandardScaler \n", - "3 ok None sklearn.preprocessing._data.PowerTransformer \n", - "4 ok None sklearn.preprocessing._data.MinMaxScaler \n", - "5 ok None sklearn.preprocessing._data.MinMaxScaler \n", - "6 ok None sklearn.preprocessing._data.MinMaxScaler \n", - "7 ok None sklearn.preprocessing._data.MinMaxScaler \n", - "8 ok None sklearn.preprocessing._data.MinMaxScaler \n", - "9 ok None sklearn.preprocessing._data.QuantileTransformer \n", - "10 ok None sklearn.preprocessing._data.MinMaxScaler \n", - "11 ok None sklearn.preprocessing._data.MinMaxScaler \n", - "12 ok None sklearn.preprocessing._data.RobustScaler \n", - "13 ok None sklearn.preprocessing._data.MinMaxScaler \n", - "14 ok None sklearn.preprocessing._data.MinMaxScaler \n", - "15 ok None sklearn.preprocessing._data.MinMaxScaler \n", + "0 ok None None \n", + "1 ok None None \n", + "2 ok None None \n", + "3 ok None None \n", + "4 ok None None \n", + "5 ok None sklearn.feature_selection._variance_threshold.... \n", + "6 ok None sklearn.preprocessing._data.Normalizer \n", + "7 ok None None \n", + "8 ok None None \n", + "9 ok None None \n", + "10 ok None None \n", + "11 ok None None \n", + "12 ok None None \n", + "13 ok None None \n", + "14 ok None None \n", + "15 ok None None \n", "16 ok None None \n", - "17 ok None sklearn.feature_selection._variance_threshold.... \n", - "18 ok None sklearn.preprocessing._data.MinMaxScaler \n", - "19 ok None sklearn.preprocessing._data.Normalizer \n", + "17 ok None None \n", + "18 ok None None \n", + "19 ok None None \n", "20 ok None None \n", "21 ok None None \n", - "22 ok None sklearn.preprocessing._data.MinMaxScaler \n", + "22 ok None None \n", "23 ok None None \n", "24 ok None None \n", - "25 ok None sklearn.preprocessing._data.MinMaxScaler \n", + "25 ok None None \n", "26 ok None None \n", "27 ok None None \n", "28 ok None None \n", "29 ok None None \n", - "30 ok None None \n", - "31 ok None None \n", "\n", " data-pre-processor_hps feature-pre-processor_class \\\n", "0 None None \n", - "1 None sklearn.decomposition._fastica.FastICA \n", + "1 None None \n", "2 None None \n", "3 None None \n", "4 None None \n", "5 None None \n", "6 None None \n", "7 None None \n", - "8 None None \n", - "9 None None \n", - "10 None None \n", + "8 None sklearn.preprocessing._polynomial.PolynomialFe... \n", + "9 None sklearn.decomposition._fastica.FastICA \n", + "10 None sklearn.decomposition._pca.PCA \n", "11 None None \n", "12 None None \n", "13 None None \n", "14 None None \n", - "15 None sklearn.decomposition._pca.PCA \n", + "15 None None \n", "16 None None \n", "17 None None \n", - "18 None sklearn.feature_selection._univariate_selectio... \n", + "18 None None \n", "19 None None \n", - "20 None None \n", + "20 None sklearn.cluster._agglomerative.FeatureAgglomer... \n", "21 None None \n", "22 None None \n", "23 None None \n", "24 None None \n", - "25 None sklearn.feature_selection._univariate_selectio... \n", + "25 None None \n", "26 None None \n", - "27 None None \n", + "27 None sklearn.feature_selection._univariate_selectio... \n", "28 None None \n", - "29 None None \n", - "30 None None \n", - "31 None None \n", + "29 None sklearn.feature_selection._univariate_selectio... \n", "\n", " feature-pre-processor_hps \\\n", "0 None \n", @@ -1356,76 +1357,70 @@ "27 None \n", "28 None \n", "29 None \n", - "30 None \n", - "31 None \n", "\n", " learner_class \\\n", - "0 sklearn.discriminant_analysis.LinearDiscrimina... \n", - "1 sklearn.discriminant_analysis.LinearDiscrimina... \n", - "2 sklearn.discriminant_analysis.LinearDiscrimina... \n", + "0 sklearn.ensemble._forest.ExtraTreesClassifier \n", + "1 sklearn.discriminant_analysis.QuadraticDiscrim... \n", + "2 sklearn.naive_bayes.GaussianNB \n", "3 sklearn.discriminant_analysis.LinearDiscrimina... \n", - "4 sklearn.discriminant_analysis.LinearDiscrimina... \n", - "5 sklearn.discriminant_analysis.LinearDiscrimina... \n", - "6 sklearn.discriminant_analysis.LinearDiscrimina... \n", - "7 sklearn.discriminant_analysis.LinearDiscrimina... \n", - "8 sklearn.discriminant_analysis.LinearDiscrimina... \n", - "9 sklearn.discriminant_analysis.LinearDiscrimina... \n", - "10 sklearn.discriminant_analysis.LinearDiscrimina... \n", - "11 sklearn.discriminant_analysis.LinearDiscrimina... \n", - "12 sklearn.discriminant_analysis.LinearDiscrimina... \n", - "13 sklearn.discriminant_analysis.LinearDiscrimina... \n", - "14 sklearn.discriminant_analysis.LinearDiscrimina... \n", - "15 sklearn.discriminant_analysis.LinearDiscrimina... \n", - "16 sklearn.discriminant_analysis.LinearDiscrimina... \n", - "17 sklearn.discriminant_analysis.LinearDiscrimina... \n", - "18 sklearn.discriminant_analysis.LinearDiscrimina... \n", - "19 sklearn.discriminant_analysis.LinearDiscrimina... \n", + "4 sklearn.ensemble._forest.RandomForestClassifier \n", + "5 sklearn.ensemble._forest.ExtraTreesClassifier \n", + "6 sklearn.ensemble._forest.ExtraTreesClassifier \n", + "7 sklearn.linear_model.LogisticRegression \n", + "8 sklearn.ensemble._forest.ExtraTreesClassifier \n", + "9 sklearn.ensemble._forest.ExtraTreesClassifier \n", + "10 sklearn.ensemble._forest.ExtraTreesClassifier \n", + "11 sklearn.ensemble._forest.ExtraTreesClassifier \n", + "12 sklearn.neural_network._multilayer_perceptron.... \n", + "13 sklearn.ensemble._forest.ExtraTreesClassifier \n", + "14 sklearn.ensemble._forest.ExtraTreesClassifier \n", + "15 sklearn.ensemble._forest.ExtraTreesClassifier \n", + "16 sklearn.ensemble.HistGradientBoostingClassifier \n", + "17 sklearn.ensemble._forest.ExtraTreesClassifier \n", + "18 sklearn.naive_bayes.MultinomialNB \n", + "19 sklearn.ensemble._forest.ExtraTreesClassifier \n", "20 sklearn.ensemble._forest.ExtraTreesClassifier \n", - "21 sklearn.discriminant_analysis.QuadraticDiscrim... \n", - "22 sklearn.discriminant_analysis.LinearDiscrimina... \n", - "23 sklearn.ensemble._forest.RandomForestClassifier \n", - "24 sklearn.naive_bayes.GaussianNB \n", - "25 sklearn.discriminant_analysis.LinearDiscrimina... \n", - "26 sklearn.neural_network._multilayer_perceptron.... \n", - "27 sklearn.ensemble.HistGradientBoostingClassifier \n", - "28 sklearn.neighbors._classification.KNeighborsCl... \n", - "29 sklearn.naive_bayes.MultinomialNB \n", - "30 sklearn.naive_bayes.BernoulliNB \n", - "31 sklearn.tree._classes.DecisionTreeClassifier \n", + "21 sklearn.ensemble._forest.ExtraTreesClassifier \n", + "22 sklearn.ensemble._forest.ExtraTreesClassifier \n", + "23 sklearn.ensemble._forest.ExtraTreesClassifier \n", + "24 sklearn.ensemble._forest.ExtraTreesClassifier \n", + "25 sklearn.tree._classes.DecisionTreeClassifier \n", + "26 sklearn.neighbors._classification.KNeighborsCl... \n", + "27 sklearn.ensemble._forest.ExtraTreesClassifier \n", + "28 sklearn.naive_bayes.BernoulliNB \n", + "29 sklearn.ensemble._forest.ExtraTreesClassifier \n", "\n", " learner_hps \n", "0 None \n", "1 None \n", "2 None \n", "3 None \n", - "4 {'shrinkage': 'auto', 'tol': 1.917751462463796... \n", - "5 {'shrinkage': 'None', 'tol': 0.0871508939358007} \n", - "6 {'shrinkage': 'None', 'tol': 0.000159870783723... \n", - "7 {'shrinkage': 'None', 'tol': 0.000478226112929... \n", - "8 {'shrinkage': 'None', 'tol': 1.735826848763034... \n", + "4 None \n", + "5 None \n", + "6 None \n", + "7 None \n", + "8 None \n", "9 None \n", - "10 {'shrinkage': 'auto', 'tol': 9.083819419770749... \n", - "11 {'shrinkage': 'auto', 'tol': 0.000390299911886... \n", + "10 None \n", + "11 {'bootstrap': 'False', 'criterion': 'gini', 'm... \n", "12 None \n", - "13 {'shrinkage': 'manual', 'tol': 0.0013352412850... \n", - "14 {'shrinkage': 'None', 'tol': 0.04619198175661532} \n", - "15 None \n", + "13 {'bootstrap': 'True', 'criterion': 'entropy', ... \n", + "14 {'bootstrap': 'False', 'criterion': 'gini', 'm... \n", + "15 {'bootstrap': 'False', 'criterion': 'entropy',... \n", "16 None \n", - "17 None \n", + "17 {'bootstrap': 'True', 'criterion': 'entropy', ... \n", "18 None \n", - "19 None \n", + "19 {'bootstrap': 'False', 'criterion': 'gini', 'm... \n", "20 None \n", - "21 None \n", - "22 {'shrinkage': 'None', 'tol': 0.011865399901662... \n", - "23 None \n", - "24 None \n", + "21 {'bootstrap': 'True', 'criterion': 'gini', 'ma... \n", + "22 {'bootstrap': 'False', 'criterion': 'entropy',... \n", + "23 {'bootstrap': 'True', 'criterion': 'gini', 'ma... \n", + "24 {'bootstrap': 'True', 'criterion': 'entropy', ... \n", "25 None \n", "26 None \n", "27 None \n", "28 None \n", - "29 None \n", - "30 None \n", - "31 None " + "29 None " ] }, "execution_count": 4, @@ -1445,7 +1440,7 @@ "outputs": [ { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -1461,7 +1456,7 @@ " scoring = naml.task.scoring[\"name\"]\n", " \n", " fig, ax = plt.subplots(figsize=(20, 4))\n", - " ax.plot(naml.history[\"time\"], naml.history[scoring])\n", + " ax.step(naml.history[\"time\"], naml.history[scoring])\n", " ax.axhline(naml.history[scoring].max(), linestyle=\"--\", color=\"black\", linewidth=1)\n", " max_val = naml.history[scoring].max()\n", " median_val = naml.history[scoring].median()\n", @@ -1855,7 +1850,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 31/31 [00:15<00:00, 2.04it/s]\n" + "100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:21<00:00, 1.49it/s]\n" ] }, { @@ -1869,7 +1864,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:43<00:00, 4.35s/it]\n" + "100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:45<00:00, 4.52s/it]\n" ] } ], @@ -1905,7 +1900,6 @@ " ['Abbrev', 'Locality',\n", " 'Map_Ref', 'Latitude',\n", " 'Sp'])])),\n", - " ('data-pre-processor', VarianceThreshold()),\n", " ('feature-pre-processor', PolynomialFeatures()),\n", " ('learner', RandomForestClassifier())])\n" ] @@ -1965,17 +1959,17 @@ " \n", " \n", " 0\n", - " 28\n", - " 1.716840e+09\n", - " 2.947976\n", + " 29\n", + " 1.728326e+09\n", + " 2.947797\n", " (ColumnTransformer(transformers=[('num',\\n ...\n", " True\n", - " -0.7442\n", + " -0.7627\n", " True\n", - " {'neg_log_loss': [-0.7438252244500024, -0.8369...\n", + " {'neg_log_loss': [-0.7863294976455495, -0.7634...\n", " ok\n", " None\n", - " sklearn.feature_selection._variance_threshold....\n", + " None\n", " None\n", " sklearn.preprocessing._polynomial.PolynomialFe...\n", " None\n", @@ -1984,17 +1978,17 @@ " \n", " \n", " 1\n", - " 21\n", - " 1.716840e+09\n", - " 1.659287\n", + " 1\n", + " 1.728326e+09\n", + " 2.122452\n", " (ColumnTransformer(transformers=[('num',\\n ...\n", " True\n", - " -0.7972\n", + " -0.8041\n", " True\n", - " {'neg_log_loss': [-0.8040130707520944, -0.8367...\n", + " {'neg_log_loss': [-0.7672424451231477, -0.7532...\n", " ok\n", " None\n", - " sklearn.feature_selection._variance_threshold....\n", + " None\n", " None\n", " None\n", " None\n", @@ -2003,17 +1997,17 @@ " \n", " \n", " 2\n", - " 1\n", - " 1.716840e+09\n", - " 1.398526\n", + " 22\n", + " 1.728326e+09\n", + " 1.680269\n", " (ColumnTransformer(transformers=[('num',\\n ...\n", " True\n", - " -0.8218\n", - " True\n", - " {'neg_log_loss': [-0.7985448420232011, -0.8385...\n", + " -0.8271\n", + " False\n", + " {'neg_log_loss': [-0.8780849948595296, -0.8649...\n", " ok\n", " None\n", - " None\n", + " sklearn.feature_selection._variance_threshold....\n", " None\n", " None\n", " None\n", @@ -2022,14 +2016,14 @@ " \n", " \n", " 3\n", - " 16\n", - " 1.716840e+09\n", - " 1.897339\n", + " 17\n", + " 1.728326e+09\n", + " 1.965827\n", " (ColumnTransformer(transformers=[('num',\\n ...\n", " True\n", - " -0.8747\n", + " -0.8628\n", " False\n", - " {'neg_log_loss': [-0.890343561760756, -0.92738...\n", + " {'neg_log_loss': [-0.8932091820019521, -0.8652...\n", " ok\n", " None\n", " sklearn.preprocessing._data.Normalizer\n", @@ -2041,14 +2035,33 @@ " \n", " \n", " 4\n", + " 34\n", + " 1.728326e+09\n", + " 5.485827\n", + " (ColumnTransformer(transformers=[('num',\\n ...\n", + " False\n", + " -0.9799\n", + " True\n", + " {'neg_log_loss': [-0.9702446225658047, -1.0076...\n", + " ok\n", + " None\n", + " None\n", + " None\n", + " sklearn.preprocessing._polynomial.PolynomialFe...\n", + " {'degree': 2, 'include_bias': 'True', 'interac...\n", + " sklearn.ensemble._forest.RandomForestClassifier\n", + " {'bootstrap': 'False', 'criterion': 'gini', 'm...\n", + " \n", + " \n", + " 5\n", " 0\n", - " 1.716840e+09\n", - " 1.420007\n", + " 1.728326e+09\n", + " 3.453922\n", " (ColumnTransformer(transformers=[('num',\\n ...\n", " True\n", - " -0.8969\n", + " -1.0150\n", " True\n", - " {'neg_log_loss': [-0.9249840584167478, -0.8883...\n", + " {'neg_log_loss': [-0.8030745978803508, -1.1351...\n", " ok\n", " None\n", " None\n", @@ -2059,113 +2072,132 @@ " None\n", " \n", " \n", - " 5\n", - " 29\n", - " 1.716840e+09\n", - " 1.536983\n", + " 6\n", + " 28\n", + " 1.728326e+09\n", + " 3.102600\n", " (ColumnTransformer(transformers=[('num',\\n ...\n", " True\n", - " -1.1550\n", + " -1.0335\n", " False\n", - " {'neg_log_loss': [-1.1235866063406887, -1.1675...\n", + " {'neg_log_loss': [-1.001467758467775, -1.04278...\n", " ok\n", " None\n", - " sklearn.feature_selection._variance_threshold....\n", " None\n", - " sklearn.feature_selection._univariate_selectio...\n", + " None\n", + " sklearn.decomposition._pca.PCA\n", " None\n", " sklearn.ensemble._forest.RandomForestClassifier\n", " None\n", " \n", " \n", - " 6\n", - " 32\n", - " 1.716840e+09\n", - " 4.452793\n", + " 7\n", + " 30\n", + " 1.728326e+09\n", + " 1.492167\n", " (ColumnTransformer(transformers=[('num',\\n ...\n", - " False\n", - " -1.1597\n", " True\n", - " {'neg_log_loss': [-1.1651899439196283, -1.1581...\n", + " -1.1590\n", + " False\n", + " {'neg_log_loss': [-0.8879810952815211, -1.1439...\n", " ok\n", " None\n", - " sklearn.feature_selection._variance_threshold....\n", " None\n", - " sklearn.preprocessing._polynomial.PolynomialFe...\n", - " {'degree': 2, 'include_bias': 'True', 'interac...\n", + " None\n", + " sklearn.feature_selection._univariate_selectio...\n", + " None\n", " sklearn.ensemble._forest.RandomForestClassifier\n", - " {'bootstrap': 'False', 'criterion': 'entropy',...\n", + " None\n", " \n", " \n", - " 7\n", - " 37\n", - " 1.716840e+09\n", - " 3.501099\n", + " 8\n", + " 40\n", + " 1.728326e+09\n", + " 5.996469\n", " (ColumnTransformer(transformers=[('num',\\n ...\n", " False\n", - " -1.2859\n", + " -1.2337\n", " True\n", - " {'neg_log_loss': [-1.2933575482942903, -1.2889...\n", + " {'neg_log_loss': [-1.2702149477323301, -1.2421...\n", " ok\n", " None\n", - " sklearn.feature_selection._variance_threshold....\n", + " None\n", " None\n", " sklearn.preprocessing._polynomial.PolynomialFe...\n", - " {'degree': 2, 'include_bias': 'False', 'intera...\n", + " {'degree': 3, 'include_bias': 'False', 'intera...\n", " sklearn.ensemble._forest.RandomForestClassifier\n", " {'bootstrap': 'False', 'criterion': 'gini', 'm...\n", " \n", " \n", - " 8\n", - " 38\n", - " 1.716840e+09\n", - " 4.022135\n", + " 9\n", + " 13\n", + " 1.728326e+09\n", + " 0.557217\n", + " (ColumnTransformer(transformers=[('num',\\n ...\n", + " True\n", + " -1.4292\n", + " False\n", + " {'neg_log_loss': [-1.463201571062134, -1.42270...\n", + " ok\n", + " None\n", + " None\n", + " None\n", + " None\n", + " None\n", + " sklearn.linear_model.LogisticRegression\n", + " None\n", + " \n", + " \n", + " 10\n", + " 32\n", + " 1.728326e+09\n", + " 4.584987\n", " (ColumnTransformer(transformers=[('num',\\n ...\n", " False\n", - " -1.2873\n", + " -1.4742\n", " True\n", - " {'neg_log_loss': [-1.3000910389764708, -1.2896...\n", + " {'neg_log_loss': [-1.4736780469349962, -1.4800...\n", " ok\n", " None\n", - " sklearn.feature_selection._variance_threshold....\n", + " None\n", " None\n", " sklearn.preprocessing._polynomial.PolynomialFe...\n", - " {'degree': 2, 'include_bias': 'False', 'intera...\n", + " {'degree': 3, 'include_bias': 'True', 'interac...\n", " sklearn.ensemble._forest.RandomForestClassifier\n", - " {'bootstrap': 'True', 'criterion': 'gini', 'ma...\n", + " {'bootstrap': 'False', 'criterion': 'entropy',...\n", " \n", " \n", - " 9\n", - " 35\n", - " 1.716840e+09\n", - " 6.834368\n", + " 11\n", + " 39\n", + " 1.728326e+09\n", + " 5.738648\n", " (ColumnTransformer(transformers=[('num',\\n ...\n", " False\n", - " -1.2885\n", + " -1.4775\n", " True\n", - " {'neg_log_loss': [-1.2962518843477562, -1.2758...\n", + " {'neg_log_loss': [-1.4825927688051026, -1.4843...\n", " ok\n", " None\n", - " sklearn.feature_selection._variance_threshold....\n", + " None\n", " None\n", " sklearn.preprocessing._polynomial.PolynomialFe...\n", - " {'degree': 3, 'include_bias': 'True', 'interac...\n", + " {'degree': 3, 'include_bias': 'False', 'intera...\n", " sklearn.ensemble._forest.RandomForestClassifier\n", - " {'bootstrap': 'True', 'criterion': 'gini', 'ma...\n", + " {'bootstrap': 'True', 'criterion': 'entropy', ...\n", " \n", " \n", - " 10\n", - " 30\n", - " 1.716840e+09\n", - " 1.111234\n", + " 12\n", + " 31\n", + " 1.728326e+09\n", + " 1.017316\n", " (ColumnTransformer(transformers=[('num',\\n ...\n", " True\n", - " -1.3710\n", + " -1.4902\n", " False\n", - " {'neg_log_loss': [-1.2476361495435986, -1.5338...\n", + " {'neg_log_loss': [-1.24194032091799, -2.034358...\n", " ok\n", " None\n", - " sklearn.feature_selection._variance_threshold....\n", + " None\n", " None\n", " sklearn.feature_selection._univariate_selectio...\n", " None\n", @@ -2173,18 +2205,18 @@ " None\n", " \n", " \n", - " 11\n", - " 34\n", - " 1.716840e+09\n", - " 3.769047\n", + " 13\n", + " 41\n", + " 1.728326e+09\n", + " 2.525508\n", " (ColumnTransformer(transformers=[('num',\\n ...\n", " False\n", - " -1.4911\n", + " -1.4922\n", " True\n", - " {'neg_log_loss': [-1.4906373918813967, -1.4923...\n", + " {'neg_log_loss': [-1.4849048424317384, -1.4926...\n", " ok\n", " None\n", - " sklearn.feature_selection._variance_threshold....\n", + " None\n", " None\n", " sklearn.preprocessing._polynomial.PolynomialFe...\n", " {'degree': 2, 'include_bias': 'True', 'interac...\n", @@ -2192,110 +2224,110 @@ " {'bootstrap': 'False', 'criterion': 'gini', 'm...\n", " \n", " \n", - " 12\n", - " 39\n", - " 1.716840e+09\n", - " 3.626931\n", + " 14\n", + " 33\n", + " 1.728326e+09\n", + " 6.054881\n", " (ColumnTransformer(transformers=[('num',\\n ...\n", " False\n", - " -1.4972\n", + " -1.4964\n", " True\n", - " {'neg_log_loss': [-1.5050712278438176, -1.4960...\n", + " {'neg_log_loss': [-1.4927437353331847, -1.5011...\n", " ok\n", " None\n", - " sklearn.feature_selection._variance_threshold....\n", + " None\n", " None\n", " sklearn.preprocessing._polynomial.PolynomialFe...\n", - " {'degree': 2, 'include_bias': 'False', 'intera...\n", + " {'degree': 3, 'include_bias': 'False', 'intera...\n", " sklearn.ensemble._forest.RandomForestClassifier\n", " {'bootstrap': 'True', 'criterion': 'entropy', ...\n", " \n", " \n", - " 13\n", - " 33\n", - " 1.716840e+09\n", - " 3.888649\n", + " 15\n", + " 38\n", + " 1.728326e+09\n", + " 2.480738\n", " (ColumnTransformer(transformers=[('num',\\n ...\n", " False\n", - " -1.5044\n", + " -1.4981\n", " True\n", - " {'neg_log_loss': [-1.5006917142343803, -1.5077...\n", + " {'neg_log_loss': [-1.5005412486743204, -1.5085...\n", " ok\n", " None\n", - " sklearn.feature_selection._variance_threshold....\n", + " None\n", " None\n", " sklearn.preprocessing._polynomial.PolynomialFe...\n", " {'degree': 2, 'include_bias': 'False', 'intera...\n", " sklearn.ensemble._forest.RandomForestClassifier\n", - " {'bootstrap': 'False', 'criterion': 'gini', 'm...\n", + " {'bootstrap': 'False', 'criterion': 'entropy',...\n", " \n", " \n", - " 14\n", - " 31\n", - " 1.716840e+09\n", - " 4.032881\n", + " 16\n", + " 37\n", + " 1.728326e+09\n", + " 2.558972\n", " (ColumnTransformer(transformers=[('num',\\n ...\n", " False\n", - " -1.5191\n", + " -1.5052\n", " True\n", - " {'neg_log_loss': [-1.5181759245255042, -1.5255...\n", + " {'neg_log_loss': [-1.4955942790677381, -1.5071...\n", " ok\n", " None\n", - " sklearn.feature_selection._variance_threshold....\n", + " None\n", " None\n", " sklearn.preprocessing._polynomial.PolynomialFe...\n", - " {'degree': 2, 'include_bias': 'False', 'intera...\n", + " {'degree': 2, 'include_bias': 'True', 'interac...\n", " sklearn.ensemble._forest.RandomForestClassifier\n", - " {'bootstrap': 'True', 'criterion': 'gini', 'ma...\n", + " {'bootstrap': 'False', 'criterion': 'entropy',...\n", " \n", " \n", - " 15\n", - " 40\n", - " 1.716840e+09\n", - " 4.946885\n", + " 17\n", + " 36\n", + " 1.728326e+09\n", + " 2.910127\n", " (ColumnTransformer(transformers=[('num',\\n ...\n", " False\n", - " -1.5267\n", + " -1.5055\n", " True\n", - " {'neg_log_loss': [-1.5283438823816966, -1.5269...\n", + " {'neg_log_loss': [-1.5096610839264617, -1.5155...\n", " ok\n", " None\n", - " sklearn.feature_selection._variance_threshold....\n", + " None\n", " None\n", " sklearn.preprocessing._polynomial.PolynomialFe...\n", - " {'degree': 3, 'include_bias': 'True', 'interac...\n", + " {'degree': 2, 'include_bias': 'False', 'intera...\n", " sklearn.ensemble._forest.RandomForestClassifier\n", - " {'bootstrap': 'False', 'criterion': 'gini', 'm...\n", + " {'bootstrap': 'False', 'criterion': 'entropy',...\n", " \n", " \n", - " 16\n", - " 36\n", - " 1.716840e+09\n", - " 4.399114\n", + " 18\n", + " 35\n", + " 1.728326e+09\n", + " 6.830578\n", " (ColumnTransformer(transformers=[('num',\\n ...\n", " False\n", - " -1.5345\n", + " -1.5217\n", " True\n", - " {'neg_log_loss': [-1.5307666763815162, -1.5358...\n", + " {'neg_log_loss': [-1.5208482591867625, -1.5264...\n", " ok\n", " None\n", - " sklearn.feature_selection._variance_threshold....\n", + " None\n", " None\n", " sklearn.preprocessing._polynomial.PolynomialFe...\n", " {'degree': 3, 'include_bias': 'True', 'interac...\n", " sklearn.ensemble._forest.RandomForestClassifier\n", - " {'bootstrap': 'False', 'criterion': 'entropy',...\n", + " {'bootstrap': 'False', 'criterion': 'gini', 'm...\n", " \n", " \n", - " 17\n", + " 19\n", " 3\n", - " 1.716840e+09\n", - " 0.160419\n", + " 1.728326e+09\n", + " 0.209384\n", " (ColumnTransformer(transformers=[('num',\\n ...\n", " True\n", - " -1.8132\n", + " -1.8294\n", " False\n", - " {'neg_log_loss': [-2.055237104076319, -1.85837...\n", + " {'neg_log_loss': [-1.7426325295846308, -1.4747...\n", " ok\n", " None\n", " None\n", @@ -2306,15 +2338,15 @@ " None\n", " \n", " \n", - " 18\n", - " 13\n", - " 1.716840e+09\n", - " 1.385043\n", + " 20\n", + " 14\n", + " 1.728326e+09\n", + " 1.522974\n", " (ColumnTransformer(transformers=[('num',\\n ...\n", " True\n", - " -3.3543\n", + " -2.4741\n", " False\n", - " {'neg_log_loss': [-2.7523470066860254, -2.4712...\n", + " {'neg_log_loss': [-2.303620361587193, -2.69780...\n", " ok\n", " None\n", " None\n", @@ -2325,15 +2357,15 @@ " None\n", " \n", " \n", - " 19\n", + " 21\n", " 6\n", - " 1.716840e+09\n", - " 0.278272\n", + " 1.728326e+09\n", + " 0.190030\n", " (ColumnTransformer(transformers=[('num',\\n ...\n", " True\n", - " -6.4282\n", + " -6.8842\n", " False\n", - " {'neg_log_loss': [-6.3949843461963, -7.0119255...\n", + " {'neg_log_loss': [-8.551792881488588, -6.36601...\n", " ok\n", " None\n", " None\n", @@ -2344,15 +2376,15 @@ " None\n", " \n", " \n", - " 20\n", + " 22\n", " 4\n", - " 1.716840e+09\n", - " 0.178737\n", + " 1.728326e+09\n", + " 0.227942\n", " (ColumnTransformer(transformers=[('num',\\n ...\n", " True\n", - " -14.3200\n", + " -13.9791\n", " False\n", - " {'neg_log_loss': [-11.689833531605561, -14.855...\n", + " {'neg_log_loss': [-13.881677318781607, -14.612...\n", " ok\n", " None\n", " None\n", @@ -2368,154 +2400,166 @@ ], "text/plain": [ " order time runtime \\\n", - "0 28 1.716840e+09 2.947976 \n", - "1 21 1.716840e+09 1.659287 \n", - "2 1 1.716840e+09 1.398526 \n", - "3 16 1.716840e+09 1.897339 \n", - "4 0 1.716840e+09 1.420007 \n", - "5 29 1.716840e+09 1.536983 \n", - "6 32 1.716840e+09 4.452793 \n", - "7 37 1.716840e+09 3.501099 \n", - "8 38 1.716840e+09 4.022135 \n", - "9 35 1.716840e+09 6.834368 \n", - "10 30 1.716840e+09 1.111234 \n", - "11 34 1.716840e+09 3.769047 \n", - "12 39 1.716840e+09 3.626931 \n", - "13 33 1.716840e+09 3.888649 \n", - "14 31 1.716840e+09 4.032881 \n", - "15 40 1.716840e+09 4.946885 \n", - "16 36 1.716840e+09 4.399114 \n", - "17 3 1.716840e+09 0.160419 \n", - "18 13 1.716840e+09 1.385043 \n", - "19 6 1.716840e+09 0.278272 \n", - "20 4 1.716840e+09 0.178737 \n", + "0 29 1.728326e+09 2.947797 \n", + "1 1 1.728326e+09 2.122452 \n", + "2 22 1.728326e+09 1.680269 \n", + "3 17 1.728326e+09 1.965827 \n", + "4 34 1.728326e+09 5.485827 \n", + "5 0 1.728326e+09 3.453922 \n", + "6 28 1.728326e+09 3.102600 \n", + "7 30 1.728326e+09 1.492167 \n", + "8 40 1.728326e+09 5.996469 \n", + "9 13 1.728326e+09 0.557217 \n", + "10 32 1.728326e+09 4.584987 \n", + "11 39 1.728326e+09 5.738648 \n", + "12 31 1.728326e+09 1.017316 \n", + "13 41 1.728326e+09 2.525508 \n", + "14 33 1.728326e+09 6.054881 \n", + "15 38 1.728326e+09 2.480738 \n", + "16 37 1.728326e+09 2.558972 \n", + "17 36 1.728326e+09 2.910127 \n", + "18 35 1.728326e+09 6.830578 \n", + "19 3 1.728326e+09 0.209384 \n", + "20 14 1.728326e+09 1.522974 \n", + "21 6 1.728326e+09 0.190030 \n", + "22 4 1.728326e+09 0.227942 \n", "\n", " pipeline default_hp \\\n", "0 (ColumnTransformer(transformers=[('num',\\n ... True \n", "1 (ColumnTransformer(transformers=[('num',\\n ... True \n", "2 (ColumnTransformer(transformers=[('num',\\n ... True \n", "3 (ColumnTransformer(transformers=[('num',\\n ... True \n", - "4 (ColumnTransformer(transformers=[('num',\\n ... True \n", + "4 (ColumnTransformer(transformers=[('num',\\n ... False \n", "5 (ColumnTransformer(transformers=[('num',\\n ... True \n", - "6 (ColumnTransformer(transformers=[('num',\\n ... False \n", - "7 (ColumnTransformer(transformers=[('num',\\n ... False \n", + "6 (ColumnTransformer(transformers=[('num',\\n ... True \n", + "7 (ColumnTransformer(transformers=[('num',\\n ... True \n", "8 (ColumnTransformer(transformers=[('num',\\n ... False \n", - "9 (ColumnTransformer(transformers=[('num',\\n ... False \n", - "10 (ColumnTransformer(transformers=[('num',\\n ... True \n", + "9 (ColumnTransformer(transformers=[('num',\\n ... True \n", + "10 (ColumnTransformer(transformers=[('num',\\n ... False \n", "11 (ColumnTransformer(transformers=[('num',\\n ... False \n", - "12 (ColumnTransformer(transformers=[('num',\\n ... False \n", + "12 (ColumnTransformer(transformers=[('num',\\n ... True \n", "13 (ColumnTransformer(transformers=[('num',\\n ... False \n", "14 (ColumnTransformer(transformers=[('num',\\n ... False \n", "15 (ColumnTransformer(transformers=[('num',\\n ... False \n", "16 (ColumnTransformer(transformers=[('num',\\n ... False \n", - "17 (ColumnTransformer(transformers=[('num',\\n ... True \n", - "18 (ColumnTransformer(transformers=[('num',\\n ... True \n", + "17 (ColumnTransformer(transformers=[('num',\\n ... False \n", + "18 (ColumnTransformer(transformers=[('num',\\n ... False \n", "19 (ColumnTransformer(transformers=[('num',\\n ... True \n", "20 (ColumnTransformer(transformers=[('num',\\n ... True \n", + "21 (ColumnTransformer(transformers=[('num',\\n ... True \n", + "22 (ColumnTransformer(transformers=[('num',\\n ... True \n", "\n", " neg_log_loss new_best evaluation_report \\\n", - "0 -0.7442 True {'neg_log_loss': [-0.7438252244500024, -0.8369... \n", - "1 -0.7972 True {'neg_log_loss': [-0.8040130707520944, -0.8367... \n", - "2 -0.8218 True {'neg_log_loss': [-0.7985448420232011, -0.8385... \n", - "3 -0.8747 False {'neg_log_loss': [-0.890343561760756, -0.92738... \n", - "4 -0.8969 True {'neg_log_loss': [-0.9249840584167478, -0.8883... \n", - "5 -1.1550 False {'neg_log_loss': [-1.1235866063406887, -1.1675... \n", - "6 -1.1597 True {'neg_log_loss': [-1.1651899439196283, -1.1581... \n", - "7 -1.2859 True {'neg_log_loss': [-1.2933575482942903, -1.2889... \n", - "8 -1.2873 True {'neg_log_loss': [-1.3000910389764708, -1.2896... \n", - "9 -1.2885 True {'neg_log_loss': [-1.2962518843477562, -1.2758... \n", - "10 -1.3710 False {'neg_log_loss': [-1.2476361495435986, -1.5338... \n", - "11 -1.4911 True {'neg_log_loss': [-1.4906373918813967, -1.4923... \n", - "12 -1.4972 True {'neg_log_loss': [-1.5050712278438176, -1.4960... \n", - "13 -1.5044 True {'neg_log_loss': [-1.5006917142343803, -1.5077... \n", - "14 -1.5191 True {'neg_log_loss': [-1.5181759245255042, -1.5255... \n", - "15 -1.5267 True {'neg_log_loss': [-1.5283438823816966, -1.5269... \n", - "16 -1.5345 True {'neg_log_loss': [-1.5307666763815162, -1.5358... \n", - "17 -1.8132 False {'neg_log_loss': [-2.055237104076319, -1.85837... \n", - "18 -3.3543 False {'neg_log_loss': [-2.7523470066860254, -2.4712... \n", - "19 -6.4282 False {'neg_log_loss': [-6.3949843461963, -7.0119255... \n", - "20 -14.3200 False {'neg_log_loss': [-11.689833531605561, -14.855... \n", + "0 -0.7627 True {'neg_log_loss': [-0.7863294976455495, -0.7634... \n", + "1 -0.8041 True {'neg_log_loss': [-0.7672424451231477, -0.7532... \n", + "2 -0.8271 False {'neg_log_loss': [-0.8780849948595296, -0.8649... \n", + "3 -0.8628 False {'neg_log_loss': [-0.8932091820019521, -0.8652... \n", + "4 -0.9799 True {'neg_log_loss': [-0.9702446225658047, -1.0076... \n", + "5 -1.0150 True {'neg_log_loss': [-0.8030745978803508, -1.1351... \n", + "6 -1.0335 False {'neg_log_loss': [-1.001467758467775, -1.04278... \n", + "7 -1.1590 False {'neg_log_loss': [-0.8879810952815211, -1.1439... \n", + "8 -1.2337 True {'neg_log_loss': [-1.2702149477323301, -1.2421... \n", + "9 -1.4292 False {'neg_log_loss': [-1.463201571062134, -1.42270... \n", + "10 -1.4742 True {'neg_log_loss': [-1.4736780469349962, -1.4800... \n", + "11 -1.4775 True {'neg_log_loss': [-1.4825927688051026, -1.4843... \n", + "12 -1.4902 False {'neg_log_loss': [-1.24194032091799, -2.034358... \n", + "13 -1.4922 True {'neg_log_loss': [-1.4849048424317384, -1.4926... \n", + "14 -1.4964 True {'neg_log_loss': [-1.4927437353331847, -1.5011... \n", + "15 -1.4981 True {'neg_log_loss': [-1.5005412486743204, -1.5085... \n", + "16 -1.5052 True {'neg_log_loss': [-1.4955942790677381, -1.5071... \n", + "17 -1.5055 True {'neg_log_loss': [-1.5096610839264617, -1.5155... \n", + "18 -1.5217 True {'neg_log_loss': [-1.5208482591867625, -1.5264... \n", + "19 -1.8294 False {'neg_log_loss': [-1.7426325295846308, -1.4747... \n", + "20 -2.4741 False {'neg_log_loss': [-2.303620361587193, -2.69780... \n", + "21 -6.8842 False {'neg_log_loss': [-8.551792881488588, -6.36601... \n", + "22 -13.9791 False {'neg_log_loss': [-13.881677318781607, -14.612... \n", "\n", " status exception data-pre-processor_class \\\n", - "0 ok None sklearn.feature_selection._variance_threshold.... \n", - "1 ok None sklearn.feature_selection._variance_threshold.... \n", - "2 ok None None \n", + "0 ok None None \n", + "1 ok None None \n", + "2 ok None sklearn.feature_selection._variance_threshold.... \n", "3 ok None sklearn.preprocessing._data.Normalizer \n", "4 ok None None \n", - "5 ok None sklearn.feature_selection._variance_threshold.... \n", - "6 ok None sklearn.feature_selection._variance_threshold.... \n", - "7 ok None sklearn.feature_selection._variance_threshold.... \n", - "8 ok None sklearn.feature_selection._variance_threshold.... \n", - "9 ok None sklearn.feature_selection._variance_threshold.... \n", - "10 ok None sklearn.feature_selection._variance_threshold.... \n", - "11 ok None sklearn.feature_selection._variance_threshold.... \n", - "12 ok None sklearn.feature_selection._variance_threshold.... \n", - "13 ok None sklearn.feature_selection._variance_threshold.... \n", - "14 ok None sklearn.feature_selection._variance_threshold.... \n", - "15 ok None sklearn.feature_selection._variance_threshold.... \n", - "16 ok None sklearn.feature_selection._variance_threshold.... \n", + "5 ok None None \n", + "6 ok None None \n", + "7 ok None None \n", + "8 ok None None \n", + "9 ok None None \n", + "10 ok None None \n", + "11 ok None None \n", + "12 ok None None \n", + "13 ok None None \n", + "14 ok None None \n", + "15 ok None None \n", + "16 ok None None \n", "17 ok None None \n", "18 ok None None \n", "19 ok None None \n", "20 ok None None \n", + "21 ok None None \n", + "22 ok None None \n", "\n", " data-pre-processor_hps feature-pre-processor_class \\\n", "0 None sklearn.preprocessing._polynomial.PolynomialFe... \n", "1 None None \n", "2 None None \n", "3 None None \n", - "4 None None \n", - "5 None sklearn.feature_selection._univariate_selectio... \n", - "6 None sklearn.preprocessing._polynomial.PolynomialFe... \n", - "7 None sklearn.preprocessing._polynomial.PolynomialFe... \n", + "4 None sklearn.preprocessing._polynomial.PolynomialFe... \n", + "5 None None \n", + "6 None sklearn.decomposition._pca.PCA \n", + "7 None sklearn.feature_selection._univariate_selectio... \n", "8 None sklearn.preprocessing._polynomial.PolynomialFe... \n", - "9 None sklearn.preprocessing._polynomial.PolynomialFe... \n", - "10 None sklearn.feature_selection._univariate_selectio... \n", + "9 None None \n", + "10 None sklearn.preprocessing._polynomial.PolynomialFe... \n", "11 None sklearn.preprocessing._polynomial.PolynomialFe... \n", - "12 None sklearn.preprocessing._polynomial.PolynomialFe... \n", + "12 None sklearn.feature_selection._univariate_selectio... \n", "13 None sklearn.preprocessing._polynomial.PolynomialFe... \n", "14 None sklearn.preprocessing._polynomial.PolynomialFe... \n", "15 None sklearn.preprocessing._polynomial.PolynomialFe... \n", "16 None sklearn.preprocessing._polynomial.PolynomialFe... \n", - "17 None None \n", - "18 None None \n", + "17 None sklearn.preprocessing._polynomial.PolynomialFe... \n", + "18 None sklearn.preprocessing._polynomial.PolynomialFe... \n", "19 None None \n", "20 None None \n", + "21 None None \n", + "22 None None \n", "\n", " feature-pre-processor_hps \\\n", "0 None \n", "1 None \n", "2 None \n", "3 None \n", - "4 None \n", + "4 {'degree': 2, 'include_bias': 'True', 'interac... \n", "5 None \n", - "6 {'degree': 2, 'include_bias': 'True', 'interac... \n", - "7 {'degree': 2, 'include_bias': 'False', 'intera... \n", - "8 {'degree': 2, 'include_bias': 'False', 'intera... \n", - "9 {'degree': 3, 'include_bias': 'True', 'interac... \n", - "10 None \n", - "11 {'degree': 2, 'include_bias': 'True', 'interac... \n", - "12 {'degree': 2, 'include_bias': 'False', 'intera... \n", - "13 {'degree': 2, 'include_bias': 'False', 'intera... \n", - "14 {'degree': 2, 'include_bias': 'False', 'intera... \n", - "15 {'degree': 3, 'include_bias': 'True', 'interac... \n", - "16 {'degree': 3, 'include_bias': 'True', 'interac... \n", - "17 None \n", - "18 None \n", + "6 None \n", + "7 None \n", + "8 {'degree': 3, 'include_bias': 'False', 'intera... \n", + "9 None \n", + "10 {'degree': 3, 'include_bias': 'True', 'interac... \n", + "11 {'degree': 3, 'include_bias': 'False', 'intera... \n", + "12 None \n", + "13 {'degree': 2, 'include_bias': 'True', 'interac... \n", + "14 {'degree': 3, 'include_bias': 'False', 'intera... \n", + "15 {'degree': 2, 'include_bias': 'False', 'intera... \n", + "16 {'degree': 2, 'include_bias': 'True', 'interac... \n", + "17 {'degree': 2, 'include_bias': 'False', 'intera... \n", + "18 {'degree': 3, 'include_bias': 'True', 'interac... \n", "19 None \n", "20 None \n", + "21 None \n", + "22 None \n", "\n", " learner_class \\\n", "0 sklearn.ensemble._forest.RandomForestClassifier \n", "1 sklearn.ensemble._forest.RandomForestClassifier \n", "2 sklearn.ensemble._forest.RandomForestClassifier \n", "3 sklearn.ensemble._forest.RandomForestClassifier \n", - "4 sklearn.ensemble._forest.ExtraTreesClassifier \n", - "5 sklearn.ensemble._forest.RandomForestClassifier \n", + "4 sklearn.ensemble._forest.RandomForestClassifier \n", + "5 sklearn.ensemble._forest.ExtraTreesClassifier \n", "6 sklearn.ensemble._forest.RandomForestClassifier \n", "7 sklearn.ensemble._forest.RandomForestClassifier \n", "8 sklearn.ensemble._forest.RandomForestClassifier \n", - "9 sklearn.ensemble._forest.RandomForestClassifier \n", + "9 sklearn.linear_model.LogisticRegression \n", "10 sklearn.ensemble._forest.RandomForestClassifier \n", "11 sklearn.ensemble._forest.RandomForestClassifier \n", "12 sklearn.ensemble._forest.RandomForestClassifier \n", @@ -2523,33 +2567,37 @@ "14 sklearn.ensemble._forest.RandomForestClassifier \n", "15 sklearn.ensemble._forest.RandomForestClassifier \n", "16 sklearn.ensemble._forest.RandomForestClassifier \n", - "17 sklearn.naive_bayes.BernoulliNB \n", - "18 sklearn.neural_network._multilayer_perceptron.... \n", - "19 sklearn.neighbors._classification.KNeighborsCl... \n", - "20 sklearn.tree._classes.DecisionTreeClassifier \n", + "17 sklearn.ensemble._forest.RandomForestClassifier \n", + "18 sklearn.ensemble._forest.RandomForestClassifier \n", + "19 sklearn.naive_bayes.BernoulliNB \n", + "20 sklearn.neural_network._multilayer_perceptron.... \n", + "21 sklearn.neighbors._classification.KNeighborsCl... \n", + "22 sklearn.tree._classes.DecisionTreeClassifier \n", "\n", " learner_hps \n", "0 None \n", "1 None \n", "2 None \n", "3 None \n", - "4 None \n", + "4 {'bootstrap': 'False', 'criterion': 'gini', 'm... \n", "5 None \n", - "6 {'bootstrap': 'False', 'criterion': 'entropy',... \n", - "7 {'bootstrap': 'False', 'criterion': 'gini', 'm... \n", - "8 {'bootstrap': 'True', 'criterion': 'gini', 'ma... \n", - "9 {'bootstrap': 'True', 'criterion': 'gini', 'ma... \n", - "10 None \n", - "11 {'bootstrap': 'False', 'criterion': 'gini', 'm... \n", - "12 {'bootstrap': 'True', 'criterion': 'entropy', ... \n", + "6 None \n", + "7 None \n", + "8 {'bootstrap': 'False', 'criterion': 'gini', 'm... \n", + "9 None \n", + "10 {'bootstrap': 'False', 'criterion': 'entropy',... \n", + "11 {'bootstrap': 'True', 'criterion': 'entropy', ... \n", + "12 None \n", "13 {'bootstrap': 'False', 'criterion': 'gini', 'm... \n", - "14 {'bootstrap': 'True', 'criterion': 'gini', 'ma... \n", - "15 {'bootstrap': 'False', 'criterion': 'gini', 'm... \n", + "14 {'bootstrap': 'True', 'criterion': 'entropy', ... \n", + "15 {'bootstrap': 'False', 'criterion': 'entropy',... \n", "16 {'bootstrap': 'False', 'criterion': 'entropy',... \n", - "17 None \n", - "18 None \n", + "17 {'bootstrap': 'False', 'criterion': 'entropy',... \n", + "18 {'bootstrap': 'False', 'criterion': 'gini', 'm... \n", "19 None \n", - "20 None " + "20 None \n", + "21 None \n", + "22 None " ] }, "execution_count": 9, @@ -2571,7 +2619,7 @@ "outputs": [ { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -2632,23 +2680,23 @@ "name": "stderr", "output_type": "stream", "text": [ - "2024-05-27 21:54:55,084 - naiveautoml - INFO - Automatically inferred task type: classification\n", - "2024-05-27 21:54:55,121 - naiveautoml - INFO - There are 0 categorical features, which will be binarized.\n", - "2024-05-27 21:54:55,122 - naiveautoml - INFO - Missing values for the different attributes are [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0].\n", - "2024-05-27 21:54:55,122 - naiveautoml - INFO - Optimizing pipeline under the following conditions.\n", + "2024-10-07 20:36:21,312 - naiveautoml - INFO - Automatically inferred task type: classification\n", + "2024-10-07 20:36:21,342 - naiveautoml - INFO - There are 0 categorical features, which will be binarized.\n", + "2024-10-07 20:36:21,342 - naiveautoml - INFO - Missing values for the different attributes are [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0].\n", + "2024-10-07 20:36:21,343 - naiveautoml - INFO - Optimizing pipeline under the following conditions.\n", " \n", " Input type: (sparse: False)\n", - " Input shape: (426, 30)\n", - " Target type: (sparse: False)\n", - " Target shape: (426,).\n", - " Scoring: {'name': 'roc_auc', 'fun': make_scorer(roc_auc_score, response_method=('decision_function', 'predict_proba'))}\n", - " Other scorings computed: []\n", - " Timeout Overall: 30\n", - " Timeout per Candidate: 300\n", - " Max HPO iterations: 1000\n", - " Max HPO iterations w/o improvement: 100\n", - " Max HPO time (s) w/o improvement: 1800\n", - " \n", + " Input shape: (426, 30)\n", + " Target type: (sparse: False)\n", + " Target shape: (426,).\n", + " Scoring: {'name': 'roc_auc', 'fun': make_scorer(roc_auc_score, response_method=('decision_function', 'predict_proba'))}\n", + " Other scorings computed: []\n", + " Timeout Overall: 30\n", + " Timeout per Candidate: 300\n", + " Max HPO iterations: 1000\n", + " Max HPO iterations w/o improvement: 100\n", + " Max HPO time (s) w/o improvement: 1800\n", + " \n", "data-pre-processor\n", "\tsklearn.preprocessing._data.MinMaxScaler\n", "\tsklearn.preprocessing._data.Normalizer\n", @@ -2681,11 +2729,12 @@ "\tsklearn.svm._classes.SVC\n", "\tsklearn.svm._classes.SVC\n", "\tsklearn.svm._classes.SVC\n", + "\tsklearn.linear_model.LogisticRegression\n", "\tsklearn.neural_network._multilayer_perceptron.MLPClassifier\n", "\tsklearn.naive_bayes.MultinomialNBRandom HPO\n", - "2024-05-27 21:54:55,123 - naiveautoml - INFO - --------------------------------------------------\n", - "2024-05-27 21:54:55,124 - naiveautoml - INFO - Choosing Algorithm for each slot\n", - "2024-05-27 21:54:55,124 - naiveautoml - INFO - --------------------------------------------------\n" + "2024-10-07 20:36:21,344 - naiveautoml - INFO - --------------------------------------------------\n", + "2024-10-07 20:36:21,344 - naiveautoml - INFO - Choosing Algorithm for each slot\n", + "2024-10-07 20:36:21,345 - naiveautoml - INFO - --------------------------------------------------\n" ] }, { @@ -2703,106 +2752,108 @@ "name": "stderr", "output_type": "stream", "text": [ - " 0%| | 0/31 [00:00\n", " \n", " 0\n", - " 1.716840e+09\n", - " 0.582555\n", + " 1.728326e+09\n", + " 0.555445\n", " (ExtraTreesClassifier())\n", " True\n", - " 0.9915\n", + " 0.9909\n", " True\n", - " {'roc_auc': [1.0, 0.9814179531160664, 0.997141...\n", + " {'roc_auc': [0.9866908650937689, 0.97973381730...\n", " ok\n", " None\n", " None\n", @@ -3079,13 +3014,13 @@ " \n", " \n", " 1\n", - " 1.716840e+09\n", - " 1.153195\n", + " 1.728326e+09\n", + " 0.948848\n", " (RandomForestClassifier())\n", " True\n", - " 0.9961\n", + " 0.9939\n", " True\n", - " {'roc_auc': [0.9902801600914808, 1.0, 0.995997...\n", + " {'roc_auc': [0.9945553539019965, 0.98396854204...\n", " ok\n", " None\n", " None\n", @@ -3097,13 +3032,13 @@ " \n", " \n", " 2\n", - " 1.716840e+09\n", - " 1.270579\n", + " 1.728326e+09\n", + " 0.794000\n", " (HistGradientBoostingClassifier())\n", " True\n", - " 0.9969\n", + " 0.9972\n", " True\n", - " {'roc_auc': [0.9988564894225272, 1.0, 0.993710...\n", + " {'roc_auc': [0.9993950393224441, 0.99334543254...\n", " ok\n", " None\n", " None\n", @@ -3115,13 +3050,13 @@ " \n", " \n", " 3\n", - " 1.716840e+09\n", - " 0.036881\n", + " 1.728326e+09\n", + " 0.021051\n", " (BernoulliNB())\n", " True\n", - " 0.5132\n", + " 0.5158\n", " False\n", - " {'roc_auc': [0.5188679245283019, 0.50943396226...\n", + " {'roc_auc': [0.5, 0.5263157894736842, 0.517543...\n", " ok\n", " None\n", " None\n", @@ -3133,13 +3068,13 @@ " \n", " \n", " 4\n", - " 1.716840e+09\n", - " 0.060323\n", + " 1.728326e+09\n", + " 0.041461\n", " (DecisionTreeClassifier())\n", " True\n", - " 0.9013\n", + " 0.9152\n", " False\n", - " {'roc_auc': [0.9545454545454545, 0.88650657518...\n", + " {'roc_auc': [0.939201451905626, 0.939503932244...\n", " ok\n", " None\n", " None\n", @@ -3151,13 +3086,13 @@ " \n", " \n", " 5\n", - " 1.716840e+09\n", - " 0.032205\n", + " 1.728326e+09\n", + " 0.017106\n", " (GaussianNB())\n", " True\n", - " 0.9903\n", + " 0.9925\n", " False\n", - " {'roc_auc': [0.9914236706689536, 0.99828473413...\n", + " {'roc_auc': [0.9891107078039928, 0.99213551119...\n", " ok\n", " None\n", " None\n", @@ -3169,13 +3104,13 @@ " \n", " \n", " 6\n", - " 1.716840e+09\n", - " 0.044195\n", + " 1.728326e+09\n", + " 0.184520\n", " (KNeighborsClassifier())\n", " True\n", - " 0.9678\n", + " 0.9461\n", " False\n", - " {'roc_auc': [0.9751286449399656, 0.98284734133...\n", + " {'roc_auc': [0.9552329098608591, 0.93768905021...\n", " ok\n", " None\n", " None\n", @@ -3187,13 +3122,13 @@ " \n", " \n", " 7\n", - " 1.716840e+09\n", - " 0.043597\n", + " 1.728326e+09\n", + " 0.058902\n", " (LinearDiscriminantAnalysis())\n", " True\n", - " 0.9914\n", + " 0.9909\n", " False\n", - " {'roc_auc': [0.9857061177815895, 0.99599771297...\n", + " {'roc_auc': [0.9963702359346642, 0.96491228070...\n", " ok\n", " None\n", " None\n", @@ -3205,13 +3140,13 @@ " \n", " \n", " 8\n", - " 1.716840e+09\n", - " 0.032398\n", + " 1.728326e+09\n", + " 0.040780\n", " (QuadraticDiscriminantAnalysis())\n", " True\n", - " 0.9863\n", + " 0.9864\n", " False\n", - " {'roc_auc': [0.9965694682675814, 0.98399085191...\n", + " {'roc_auc': [0.9624924379915305, 0.99758015728...\n", " ok\n", " None\n", " None\n", @@ -3223,13 +3158,13 @@ " \n", " \n", " 9\n", - " 1.716840e+09\n", - " 3.127650\n", + " 1.728326e+09\n", + " 3.260550\n", " (SVC(kernel='linear'))\n", " True\n", - " 0.9868\n", + " 0.9926\n", " False\n", - " {'roc_auc': [0.9977129788450544, 0.97255574614...\n", + " {'roc_auc': [0.9975801572897761, 0.98850574712...\n", " ok\n", " None\n", " None\n", @@ -3241,13 +3176,13 @@ " \n", " \n", " 10\n", - " 1.716840e+09\n", - " 0.060098\n", + " 1.728326e+09\n", + " 0.055493\n", " (SVC())\n", " True\n", - " 0.9721\n", + " 0.9811\n", " False\n", - " {'roc_auc': [0.9685534591194969, 0.94568324757...\n", + " {'roc_auc': [0.9879007864488808, 0.98427102238...\n", " ok\n", " None\n", " None\n", @@ -3259,13 +3194,13 @@ " \n", " \n", " 11\n", - " 1.716840e+09\n", - " 0.066071\n", + " 1.728326e+09\n", + " 0.060643\n", " (SVC(kernel='poly'))\n", " True\n", - " 0.9680\n", + " 0.9787\n", " False\n", - " {'roc_auc': [0.9576901086335048, 0.94053744997...\n", + " {'roc_auc': [0.9885057471264368, 0.98366606170...\n", " ok\n", " None\n", " None\n", @@ -3277,13 +3212,13 @@ " \n", " \n", " 12\n", - " 1.716840e+09\n", - " 0.093112\n", + " 1.728326e+09\n", + " 0.085472\n", " (SVC(kernel='sigmoid'))\n", " True\n", - " 0.2492\n", + " 0.2052\n", " False\n", - " {'roc_auc': [0.3602058319039452, 0.26357918810...\n", + " {'roc_auc': [0.21536600120992133, 0.1657592256...\n", " ok\n", " None\n", " None\n", @@ -3295,13 +3230,31 @@ " \n", " \n", " 13\n", - " 1.716840e+09\n", - " 1.389846\n", + " 1.728326e+09\n", + " 0.481201\n", + " (LogisticRegression())\n", + " True\n", + " 0.9901\n", + " False\n", + " {'roc_auc': [0.9939503932244403, 0.98971566848...\n", + " ok\n", + " None\n", + " None\n", + " None\n", + " None\n", + " None\n", + " sklearn.linear_model.LogisticRegression\n", + " None\n", + " \n", + " \n", + " 14\n", + " 1.728326e+09\n", + " 4.706863\n", " (MLPClassifier())\n", " True\n", - " 0.9639\n", + " 0.9721\n", " False\n", - " {'roc_auc': [0.9759862778730704, 0.96455117209...\n", + " {'roc_auc': [0.9370840895341803, 0.98850574712...\n", " ok\n", " None\n", " None\n", @@ -3312,14 +3265,14 @@ " None\n", " \n", " \n", - " 14\n", - " 1.716840e+09\n", - " 0.024595\n", + " 15\n", + " 1.728326e+09\n", + " 0.049075\n", " (MultinomialNB())\n", " True\n", - " 0.9485\n", + " 0.9576\n", " False\n", - " {'roc_auc': [0.9522584333905089, 0.94511149228...\n", + " {'roc_auc': [0.984875983061101, 0.977011494252...\n", " ok\n", " None\n", " None\n", @@ -3330,9 +3283,9 @@ " None\n", " \n", " \n", - " 15\n", - " 1.716840e+09\n", - " 0.006696\n", + " 16\n", + " 1.728326e+09\n", + " 0.015053\n", " (MinMaxScaler(), HistGradientBoostingClassifie...\n", " True\n", " NaN\n", @@ -3348,14 +3301,14 @@ " None\n", " \n", " \n", - " 16\n", - " 1.716840e+09\n", - " 0.944081\n", + " 17\n", + " 1.728326e+09\n", + " 0.889329\n", " (Normalizer(), HistGradientBoostingClassifier())\n", " True\n", - " 0.9898\n", + " 0.9906\n", " False\n", - " {'roc_auc': [1.0, 0.9925671812464265, 0.998284...\n", + " {'roc_auc': [0.9770114942528735, 0.99153055051...\n", " ok\n", " None\n", " sklearn.preprocessing._data.Normalizer\n", @@ -3366,9 +3319,9 @@ " None\n", " \n", " \n", - " 17\n", - " 1.716840e+09\n", - " 0.005763\n", + " 18\n", + " 1.728326e+09\n", + " 0.004544\n", " (PowerTransformer(), HistGradientBoostingClass...\n", " True\n", " NaN\n", @@ -3384,9 +3337,9 @@ " None\n", " \n", " \n", - " 18\n", - " 1.716840e+09\n", - " 0.009366\n", + " 19\n", + " 1.728326e+09\n", + " 0.004509\n", " (QuantileTransformer(), HistGradientBoostingCl...\n", " True\n", " NaN\n", @@ -3402,9 +3355,9 @@ " None\n", " \n", " \n", - " 19\n", - " 1.716840e+09\n", - " 0.011938\n", + " 20\n", + " 1.728326e+09\n", + " 0.005793\n", " (RobustScaler(), HistGradientBoostingClassifie...\n", " True\n", " NaN\n", @@ -3420,9 +3373,9 @@ " None\n", " \n", " \n", - " 20\n", - " 1.716840e+09\n", - " 0.008092\n", + " 21\n", + " 1.728326e+09\n", + " 0.006627\n", " (StandardScaler(), HistGradientBoostingClassif...\n", " True\n", " NaN\n", @@ -3438,14 +3391,14 @@ " None\n", " \n", " \n", - " 21\n", - " 1.716840e+09\n", - " 0.947790\n", + " 22\n", + " 1.728326e+09\n", + " 0.886483\n", " (VarianceThreshold(), HistGradientBoostingClas...\n", " True\n", - " 0.9887\n", - " False\n", - " {'roc_auc': [0.9765580331618068, 0.99656946826...\n", + " 0.9983\n", + " True\n", + " {'roc_auc': [0.9963702359346642, 1.0, 0.998185...\n", " ok\n", " None\n", " sklearn.feature_selection._variance_threshold....\n", @@ -3456,17 +3409,17 @@ " None\n", " \n", " \n", - " 22\n", - " 1.716840e+09\n", - " 1.564505\n", - " (FastICA(), HistGradientBoostingClassifier())\n", + " 23\n", + " 1.728326e+09\n", + " 3.913556\n", + " (VarianceThreshold(), FastICA(), HistGradientB...\n", " True\n", - " 0.9456\n", + " 0.9653\n", " False\n", - " {'roc_auc': [0.9210977701543739, 0.94225271583...\n", + " {'roc_auc': [0.9897156684815487, 0.95341802782...\n", " ok\n", " None\n", - " None\n", + " sklearn.feature_selection._variance_threshold....\n", " None\n", " sklearn.decomposition._fastica.FastICA\n", " None\n", @@ -3474,17 +3427,17 @@ " None\n", " \n", " \n", - " 23\n", - " 1.716840e+09\n", - " 0.009267\n", - " (FeatureAgglomeration(), HistGradientBoostingC...\n", + " 24\n", + " 1.728326e+09\n", + " 0.005385\n", + " (VarianceThreshold(), FeatureAgglomeration(), ...\n", " True\n", " NaN\n", " False\n", " None\n", " avoided\n", " None\n", - " None\n", + " sklearn.feature_selection._variance_threshold....\n", " None\n", " sklearn.cluster._agglomerative.FeatureAgglomer...\n", " None\n", @@ -3492,17 +3445,17 @@ " None\n", " \n", " \n", - " 24\n", - " 1.716840e+09\n", - " 0.008169\n", - " (KernelPCA(), HistGradientBoostingClassifier())\n", + " 25\n", + " 1.728326e+09\n", + " 0.004398\n", + " (VarianceThreshold(), KernelPCA(), HistGradien...\n", " True\n", " NaN\n", " False\n", " None\n", " avoided\n", " None\n", - " None\n", + " sklearn.feature_selection._variance_threshold....\n", " None\n", " sklearn.decomposition._kernel_pca.KernelPCA\n", " None\n", @@ -3510,17 +3463,17 @@ " None\n", " \n", " \n", - " 25\n", - " 1.716840e+09\n", - " 0.005599\n", - " (RBFSampler(), HistGradientBoostingClassifier())\n", + " 26\n", + " 1.728326e+09\n", + " 0.004593\n", + " (VarianceThreshold(), RBFSampler(), HistGradie...\n", " True\n", " NaN\n", " False\n", " None\n", " avoided\n", " None\n", - " None\n", + " sklearn.feature_selection._variance_threshold....\n", " None\n", " sklearn.kernel_approximation.RBFSampler\n", " None\n", @@ -3528,17 +3481,17 @@ " None\n", " \n", " \n", - " 26\n", - " 1.716840e+09\n", - " 0.007714\n", - " (Nystroem(), HistGradientBoostingClassifier())\n", + " 27\n", + " 1.728326e+09\n", + " 0.004105\n", + " (VarianceThreshold(), Nystroem(), HistGradient...\n", " True\n", " NaN\n", " False\n", " None\n", " avoided\n", " None\n", - " None\n", + " sklearn.feature_selection._variance_threshold....\n", " None\n", " sklearn.kernel_approximation.Nystroem\n", " None\n", @@ -3546,17 +3499,17 @@ " None\n", " \n", " \n", - " 27\n", - " 1.716840e+09\n", - " 0.963981\n", - " (PCA(), HistGradientBoostingClassifier())\n", + " 28\n", + " 1.728326e+09\n", + " 1.395434\n", + " (VarianceThreshold(), PCA(), HistGradientBoost...\n", " True\n", - " 0.9874\n", + " 0.9904\n", " False\n", - " {'roc_auc': [0.978273299028016, 0.994282447112...\n", + " {'roc_auc': [0.9981851179673321, 0.99637023593...\n", " ok\n", " None\n", - " None\n", + " sklearn.feature_selection._variance_threshold....\n", " None\n", " sklearn.decomposition._pca.PCA\n", " None\n", @@ -3564,17 +3517,17 @@ " None\n", " \n", " \n", - " 28\n", - " 1.716840e+09\n", - " 0.008186\n", - " (PolynomialFeatures(), HistGradientBoostingCla...\n", + " 29\n", + " 1.728326e+09\n", + " 0.004117\n", + " (VarianceThreshold(), PolynomialFeatures(), Hi...\n", " True\n", " NaN\n", " False\n", " None\n", " avoided\n", " None\n", - " None\n", + " sklearn.feature_selection._variance_threshold....\n", " None\n", " sklearn.preprocessing._polynomial.PolynomialFe...\n", " None\n", @@ -3582,17 +3535,17 @@ " None\n", " \n", " \n", - " 29\n", - " 1.716840e+09\n", - " 0.680890\n", - " (SelectPercentile(), HistGradientBoostingClass...\n", + " 30\n", + " 1.728326e+09\n", + " 0.572134\n", + " (VarianceThreshold(), SelectPercentile(), Hist...\n", " True\n", - " 0.9816\n", + " 0.9817\n", " False\n", - " {'roc_auc': [0.9942824471126358, 0.97284162378...\n", + " {'roc_auc': [0.9879007864488808, 0.95583787053...\n", " ok\n", " None\n", - " None\n", + " sklearn.feature_selection._variance_threshold....\n", " None\n", " sklearn.feature_selection._univariate_selectio...\n", " None\n", @@ -3600,17 +3553,17 @@ " None\n", " \n", " \n", - " 30\n", - " 1.716840e+09\n", - " 0.620914\n", - " (GenericUnivariateSelect(), HistGradientBoosti...\n", + " 31\n", + " 1.728326e+09\n", + " 0.551066\n", + " (VarianceThreshold(), GenericUnivariateSelect(...\n", " True\n", - " 0.9413\n", + " 0.9283\n", " False\n", - " {'roc_auc': [0.9451114922813036, 0.95683247570...\n", + " {'roc_auc': [0.8974591651542649, 0.93829401088...\n", " ok\n", " None\n", - " None\n", + " sklearn.feature_selection._variance_threshold....\n", " None\n", " sklearn.feature_selection._univariate_selectio...\n", " None\n", @@ -3618,184 +3571,58 @@ " None\n", " \n", " \n", - " 31\n", - " 1.716840e+09\n", - " 0.288856\n", - " (HistGradientBoostingClassifier(early_stopping...\n", - " False\n", - " 0.9903\n", - " True\n", - " {'roc_auc': [0.9988564894225271, 0.97484276729...\n", - " ok\n", - " None\n", - " None\n", - " None\n", - " None\n", - " None\n", - " sklearn.ensemble.HistGradientBoostingClassifier\n", - " {'early_stop': 'valid', 'l2_regularization': 0...\n", - " \n", - " \n", " 32\n", - " 1.716840e+09\n", - " 0.194818\n", - " (HistGradientBoostingClassifier(early_stopping...\n", + " 1.728326e+09\n", + " 1.881600\n", + " (VarianceThreshold(), HistGradientBoostingClas...\n", " False\n", - " 0.9740\n", + " 0.9953\n", " True\n", - " {'roc_auc': [0.9765580331618068, 0.98456260720...\n", + " {'roc_auc': [0.9975801572897761, 0.98850574712...\n", " ok\n", " None\n", - " None\n", + " sklearn.feature_selection._variance_threshold....\n", " None\n", " None\n", " None\n", " sklearn.ensemble.HistGradientBoostingClassifier\n", - " {'early_stop': 'valid', 'l2_regularization': 3...\n", + " {'early_stop': 'valid', 'l2_regularization': 6...\n", " \n", " \n", " 33\n", - " 1.716840e+09\n", - " 1.836760\n", - " (HistGradientBoostingClassifier(early_stopping...\n", + " 1.728326e+09\n", + " 3.470177\n", + " (VarianceThreshold(), HistGradientBoostingClas...\n", " False\n", - " 0.9842\n", + " 0.9854\n", " True\n", - " {'roc_auc': [0.9817038307604345, 0.98570611778...\n", + " {'roc_auc': [0.9764065335753176, 0.99576527525...\n", " ok\n", " None\n", - " None\n", + " sklearn.feature_selection._variance_threshold....\n", " None\n", " None\n", " None\n", " sklearn.ensemble.HistGradientBoostingClassifier\n", - " {'early_stop': 'off', 'l2_regularization': 4.9...\n", + " {'early_stop': 'off', 'l2_regularization': 1.2...\n", " \n", " \n", " 34\n", - " 1.716840e+09\n", - " 3.107489\n", - " (HistGradientBoostingClassifier(early_stopping...\n", - " False\n", - " 0.9816\n", - " True\n", - " {'roc_auc': [0.9731275014293883, 0.96112064036...\n", - " ok\n", - " None\n", - " None\n", - " None\n", - " None\n", - " None\n", - " sklearn.ensemble.HistGradientBoostingClassifier\n", - " {'early_stop': 'off', 'l2_regularization': 0.2...\n", - " \n", - " \n", - " 35\n", - " 1.716840e+09\n", - " 3.310561\n", - " (HistGradientBoostingClassifier(early_stopping...\n", - " False\n", - " 0.9931\n", - " True\n", - " {'roc_auc': [1.0, 0.9834190966266438, 1.0, 0.9...\n", - " ok\n", - " None\n", - " None\n", - " None\n", - " None\n", - " None\n", - " sklearn.ensemble.HistGradientBoostingClassifier\n", - " {'early_stop': 'train', 'l2_regularization': 0...\n", - " \n", - " \n", - " 36\n", - " 1.716840e+09\n", - " 0.553527\n", - " (HistGradientBoostingClassifier(early_stopping...\n", - " False\n", - " 0.9923\n", - " True\n", - " {'roc_auc': [0.9994282447112637, 0.98713550600...\n", - " ok\n", - " None\n", - " None\n", - " None\n", - " None\n", - " None\n", - " sklearn.ensemble.HistGradientBoostingClassifier\n", - " {'early_stop': 'valid', 'l2_regularization': 9...\n", - " \n", - " \n", - " 37\n", - " 1.716840e+09\n", - " 3.286090\n", - " (HistGradientBoostingClassifier(early_stopping...\n", - " False\n", - " 0.9965\n", - " True\n", - " {'roc_auc': [0.9937106918238994, 0.99428244711...\n", - " ok\n", - " None\n", - " None\n", - " None\n", - " None\n", - " None\n", - " sklearn.ensemble.HistGradientBoostingClassifier\n", - " {'early_stop': 'train', 'l2_regularization': 0...\n", - " \n", - " \n", - " 38\n", - " 1.716840e+09\n", - " 0.182795\n", - " (HistGradientBoostingClassifier(early_stopping...\n", - " False\n", - " 0.9715\n", - " True\n", - " {'roc_auc': [0.9794168096054888, 0.99828473413...\n", - " ok\n", - " None\n", - " None\n", - " None\n", - " None\n", - " None\n", - " sklearn.ensemble.HistGradientBoostingClassifier\n", - " {'early_stop': 'valid', 'l2_regularization': 0...\n", - " \n", - " \n", - " 39\n", - " 1.716840e+09\n", - " 1.404757\n", - " (HistGradientBoostingClassifier(early_stopping...\n", - " False\n", - " 0.9850\n", - " True\n", - " {'roc_auc': [0.9736992567181246, 0.99599771297...\n", - " ok\n", - " None\n", - " None\n", - " None\n", - " None\n", - " None\n", - " sklearn.ensemble.HistGradientBoostingClassifier\n", - " {'early_stop': 'off', 'l2_regularization': 8.3...\n", - " \n", - " \n", - " 40\n", - " 1.716840e+09\n", - " 2.862135\n", - " (HistGradientBoostingClassifier(early_stopping...\n", + " 1.728326e+09\n", + " 3.190365\n", + " (VarianceThreshold(), HistGradientBoostingClas...\n", " False\n", - " 0.9868\n", + " 0.9947\n", " True\n", - " {'roc_auc': [0.982275586049171, 0.991423670668...\n", + " {'roc_auc': [0.9830611010284331, 0.99879007864...\n", " ok\n", " None\n", - " None\n", + " sklearn.feature_selection._variance_threshold....\n", " None\n", " None\n", " None\n", " sklearn.ensemble.HistGradientBoostingClassifier\n", - " {'early_stop': 'off', 'l2_regularization': 0.1...\n", + " {'early_stop': 'off', 'l2_regularization': 0.3...\n", " \n", " \n", "\n", @@ -3803,133 +3630,115 @@ ], "text/plain": [ " time runtime pipeline \\\n", - "0 1.716840e+09 0.582555 (ExtraTreesClassifier()) \n", - "1 1.716840e+09 1.153195 (RandomForestClassifier()) \n", - "2 1.716840e+09 1.270579 (HistGradientBoostingClassifier()) \n", - "3 1.716840e+09 0.036881 (BernoulliNB()) \n", - "4 1.716840e+09 0.060323 (DecisionTreeClassifier()) \n", - "5 1.716840e+09 0.032205 (GaussianNB()) \n", - "6 1.716840e+09 0.044195 (KNeighborsClassifier()) \n", - "7 1.716840e+09 0.043597 (LinearDiscriminantAnalysis()) \n", - "8 1.716840e+09 0.032398 (QuadraticDiscriminantAnalysis()) \n", - "9 1.716840e+09 3.127650 (SVC(kernel='linear')) \n", - "10 1.716840e+09 0.060098 (SVC()) \n", - "11 1.716840e+09 0.066071 (SVC(kernel='poly')) \n", - "12 1.716840e+09 0.093112 (SVC(kernel='sigmoid')) \n", - "13 1.716840e+09 1.389846 (MLPClassifier()) \n", - "14 1.716840e+09 0.024595 (MultinomialNB()) \n", - "15 1.716840e+09 0.006696 (MinMaxScaler(), HistGradientBoostingClassifie... \n", - "16 1.716840e+09 0.944081 (Normalizer(), HistGradientBoostingClassifier()) \n", - "17 1.716840e+09 0.005763 (PowerTransformer(), HistGradientBoostingClass... \n", - "18 1.716840e+09 0.009366 (QuantileTransformer(), HistGradientBoostingCl... \n", - "19 1.716840e+09 0.011938 (RobustScaler(), HistGradientBoostingClassifie... \n", - "20 1.716840e+09 0.008092 (StandardScaler(), HistGradientBoostingClassif... \n", - "21 1.716840e+09 0.947790 (VarianceThreshold(), HistGradientBoostingClas... \n", - "22 1.716840e+09 1.564505 (FastICA(), HistGradientBoostingClassifier()) \n", - "23 1.716840e+09 0.009267 (FeatureAgglomeration(), HistGradientBoostingC... \n", - "24 1.716840e+09 0.008169 (KernelPCA(), HistGradientBoostingClassifier()) \n", - "25 1.716840e+09 0.005599 (RBFSampler(), HistGradientBoostingClassifier()) \n", - "26 1.716840e+09 0.007714 (Nystroem(), HistGradientBoostingClassifier()) \n", - "27 1.716840e+09 0.963981 (PCA(), HistGradientBoostingClassifier()) \n", - "28 1.716840e+09 0.008186 (PolynomialFeatures(), HistGradientBoostingCla... \n", - "29 1.716840e+09 0.680890 (SelectPercentile(), HistGradientBoostingClass... \n", - "30 1.716840e+09 0.620914 (GenericUnivariateSelect(), HistGradientBoosti... \n", - "31 1.716840e+09 0.288856 (HistGradientBoostingClassifier(early_stopping... \n", - "32 1.716840e+09 0.194818 (HistGradientBoostingClassifier(early_stopping... \n", - "33 1.716840e+09 1.836760 (HistGradientBoostingClassifier(early_stopping... \n", - "34 1.716840e+09 3.107489 (HistGradientBoostingClassifier(early_stopping... \n", - "35 1.716840e+09 3.310561 (HistGradientBoostingClassifier(early_stopping... \n", - "36 1.716840e+09 0.553527 (HistGradientBoostingClassifier(early_stopping... \n", - "37 1.716840e+09 3.286090 (HistGradientBoostingClassifier(early_stopping... \n", - "38 1.716840e+09 0.182795 (HistGradientBoostingClassifier(early_stopping... \n", - "39 1.716840e+09 1.404757 (HistGradientBoostingClassifier(early_stopping... \n", - "40 1.716840e+09 2.862135 (HistGradientBoostingClassifier(early_stopping... \n", + "0 1.728326e+09 0.555445 (ExtraTreesClassifier()) \n", + "1 1.728326e+09 0.948848 (RandomForestClassifier()) \n", + "2 1.728326e+09 0.794000 (HistGradientBoostingClassifier()) \n", + "3 1.728326e+09 0.021051 (BernoulliNB()) \n", + "4 1.728326e+09 0.041461 (DecisionTreeClassifier()) \n", + "5 1.728326e+09 0.017106 (GaussianNB()) \n", + "6 1.728326e+09 0.184520 (KNeighborsClassifier()) \n", + "7 1.728326e+09 0.058902 (LinearDiscriminantAnalysis()) \n", + "8 1.728326e+09 0.040780 (QuadraticDiscriminantAnalysis()) \n", + "9 1.728326e+09 3.260550 (SVC(kernel='linear')) \n", + "10 1.728326e+09 0.055493 (SVC()) \n", + "11 1.728326e+09 0.060643 (SVC(kernel='poly')) \n", + "12 1.728326e+09 0.085472 (SVC(kernel='sigmoid')) \n", + "13 1.728326e+09 0.481201 (LogisticRegression()) \n", + "14 1.728326e+09 4.706863 (MLPClassifier()) \n", + "15 1.728326e+09 0.049075 (MultinomialNB()) \n", + "16 1.728326e+09 0.015053 (MinMaxScaler(), HistGradientBoostingClassifie... \n", + "17 1.728326e+09 0.889329 (Normalizer(), HistGradientBoostingClassifier()) \n", + "18 1.728326e+09 0.004544 (PowerTransformer(), HistGradientBoostingClass... \n", + "19 1.728326e+09 0.004509 (QuantileTransformer(), HistGradientBoostingCl... \n", + "20 1.728326e+09 0.005793 (RobustScaler(), HistGradientBoostingClassifie... \n", + "21 1.728326e+09 0.006627 (StandardScaler(), HistGradientBoostingClassif... \n", + "22 1.728326e+09 0.886483 (VarianceThreshold(), HistGradientBoostingClas... \n", + "23 1.728326e+09 3.913556 (VarianceThreshold(), FastICA(), HistGradientB... \n", + "24 1.728326e+09 0.005385 (VarianceThreshold(), FeatureAgglomeration(), ... \n", + "25 1.728326e+09 0.004398 (VarianceThreshold(), KernelPCA(), HistGradien... \n", + "26 1.728326e+09 0.004593 (VarianceThreshold(), RBFSampler(), HistGradie... \n", + "27 1.728326e+09 0.004105 (VarianceThreshold(), Nystroem(), HistGradient... \n", + "28 1.728326e+09 1.395434 (VarianceThreshold(), PCA(), HistGradientBoost... \n", + "29 1.728326e+09 0.004117 (VarianceThreshold(), PolynomialFeatures(), Hi... \n", + "30 1.728326e+09 0.572134 (VarianceThreshold(), SelectPercentile(), Hist... \n", + "31 1.728326e+09 0.551066 (VarianceThreshold(), GenericUnivariateSelect(... \n", + "32 1.728326e+09 1.881600 (VarianceThreshold(), HistGradientBoostingClas... \n", + "33 1.728326e+09 3.470177 (VarianceThreshold(), HistGradientBoostingClas... \n", + "34 1.728326e+09 3.190365 (VarianceThreshold(), HistGradientBoostingClas... \n", "\n", " default_hp roc_auc new_best \\\n", - "0 True 0.9915 True \n", - "1 True 0.9961 True \n", - "2 True 0.9969 True \n", - "3 True 0.5132 False \n", - "4 True 0.9013 False \n", - "5 True 0.9903 False \n", - "6 True 0.9678 False \n", - "7 True 0.9914 False \n", - "8 True 0.9863 False \n", - "9 True 0.9868 False \n", - "10 True 0.9721 False \n", - "11 True 0.9680 False \n", - "12 True 0.2492 False \n", - "13 True 0.9639 False \n", - "14 True 0.9485 False \n", - "15 True NaN False \n", - "16 True 0.9898 False \n", - "17 True NaN False \n", + "0 True 0.9909 True \n", + "1 True 0.9939 True \n", + "2 True 0.9972 True \n", + "3 True 0.5158 False \n", + "4 True 0.9152 False \n", + "5 True 0.9925 False \n", + "6 True 0.9461 False \n", + "7 True 0.9909 False \n", + "8 True 0.9864 False \n", + "9 True 0.9926 False \n", + "10 True 0.9811 False \n", + "11 True 0.9787 False \n", + "12 True 0.2052 False \n", + "13 True 0.9901 False \n", + "14 True 0.9721 False \n", + "15 True 0.9576 False \n", + "16 True NaN False \n", + "17 True 0.9906 False \n", "18 True NaN False \n", "19 True NaN False \n", "20 True NaN False \n", - "21 True 0.9887 False \n", - "22 True 0.9456 False \n", - "23 True NaN False \n", + "21 True NaN False \n", + "22 True 0.9983 True \n", + "23 True 0.9653 False \n", "24 True NaN False \n", "25 True NaN False \n", "26 True NaN False \n", - "27 True 0.9874 False \n", - "28 True NaN False \n", - "29 True 0.9816 False \n", - "30 True 0.9413 False \n", - "31 False 0.9903 True \n", - "32 False 0.9740 True \n", - "33 False 0.9842 True \n", - "34 False 0.9816 True \n", - "35 False 0.9931 True \n", - "36 False 0.9923 True \n", - "37 False 0.9965 True \n", - "38 False 0.9715 True \n", - "39 False 0.9850 True \n", - "40 False 0.9868 True \n", + "27 True NaN False \n", + "28 True 0.9904 False \n", + "29 True NaN False \n", + "30 True 0.9817 False \n", + "31 True 0.9283 False \n", + "32 False 0.9953 True \n", + "33 False 0.9854 True \n", + "34 False 0.9947 True \n", "\n", " evaluation_report status exception \\\n", - "0 {'roc_auc': [1.0, 0.9814179531160664, 0.997141... ok None \n", - "1 {'roc_auc': [0.9902801600914808, 1.0, 0.995997... ok None \n", - "2 {'roc_auc': [0.9988564894225272, 1.0, 0.993710... ok None \n", - "3 {'roc_auc': [0.5188679245283019, 0.50943396226... ok None \n", - "4 {'roc_auc': [0.9545454545454545, 0.88650657518... ok None \n", - "5 {'roc_auc': [0.9914236706689536, 0.99828473413... ok None \n", - "6 {'roc_auc': [0.9751286449399656, 0.98284734133... ok None \n", - "7 {'roc_auc': [0.9857061177815895, 0.99599771297... ok None \n", - "8 {'roc_auc': [0.9965694682675814, 0.98399085191... ok None \n", - "9 {'roc_auc': [0.9977129788450544, 0.97255574614... ok None \n", - "10 {'roc_auc': [0.9685534591194969, 0.94568324757... ok None \n", - "11 {'roc_auc': [0.9576901086335048, 0.94053744997... ok None \n", - "12 {'roc_auc': [0.3602058319039452, 0.26357918810... ok None \n", - "13 {'roc_auc': [0.9759862778730704, 0.96455117209... ok None \n", - "14 {'roc_auc': [0.9522584333905089, 0.94511149228... ok None \n", - "15 None avoided None \n", - "16 {'roc_auc': [1.0, 0.9925671812464265, 0.998284... ok None \n", - "17 None avoided None \n", + "0 {'roc_auc': [0.9866908650937689, 0.97973381730... ok None \n", + "1 {'roc_auc': [0.9945553539019965, 0.98396854204... ok None \n", + "2 {'roc_auc': [0.9993950393224441, 0.99334543254... ok None \n", + "3 {'roc_auc': [0.5, 0.5263157894736842, 0.517543... ok None \n", + "4 {'roc_auc': [0.939201451905626, 0.939503932244... ok None \n", + "5 {'roc_auc': [0.9891107078039928, 0.99213551119... ok None \n", + "6 {'roc_auc': [0.9552329098608591, 0.93768905021... ok None \n", + "7 {'roc_auc': [0.9963702359346642, 0.96491228070... ok None \n", + "8 {'roc_auc': [0.9624924379915305, 0.99758015728... ok None \n", + "9 {'roc_auc': [0.9975801572897761, 0.98850574712... ok None \n", + "10 {'roc_auc': [0.9879007864488808, 0.98427102238... ok None \n", + "11 {'roc_auc': [0.9885057471264368, 0.98366606170... ok None \n", + "12 {'roc_auc': [0.21536600120992133, 0.1657592256... ok None \n", + "13 {'roc_auc': [0.9939503932244403, 0.98971566848... ok None \n", + "14 {'roc_auc': [0.9370840895341803, 0.98850574712... ok None \n", + "15 {'roc_auc': [0.984875983061101, 0.977011494252... ok None \n", + "16 None avoided None \n", + "17 {'roc_auc': [0.9770114942528735, 0.99153055051... ok None \n", "18 None avoided None \n", "19 None avoided None \n", "20 None avoided None \n", - "21 {'roc_auc': [0.9765580331618068, 0.99656946826... ok None \n", - "22 {'roc_auc': [0.9210977701543739, 0.94225271583... ok None \n", - "23 None avoided None \n", + "21 None avoided None \n", + "22 {'roc_auc': [0.9963702359346642, 1.0, 0.998185... ok None \n", + "23 {'roc_auc': [0.9897156684815487, 0.95341802782... ok None \n", "24 None avoided None \n", "25 None avoided None \n", "26 None avoided None \n", - "27 {'roc_auc': [0.978273299028016, 0.994282447112... ok None \n", - "28 None avoided None \n", - "29 {'roc_auc': [0.9942824471126358, 0.97284162378... ok None \n", - "30 {'roc_auc': [0.9451114922813036, 0.95683247570... ok None \n", - "31 {'roc_auc': [0.9988564894225271, 0.97484276729... ok None \n", - "32 {'roc_auc': [0.9765580331618068, 0.98456260720... ok None \n", - "33 {'roc_auc': [0.9817038307604345, 0.98570611778... ok None \n", - "34 {'roc_auc': [0.9731275014293883, 0.96112064036... ok None \n", - "35 {'roc_auc': [1.0, 0.9834190966266438, 1.0, 0.9... ok None \n", - "36 {'roc_auc': [0.9994282447112637, 0.98713550600... ok None \n", - "37 {'roc_auc': [0.9937106918238994, 0.99428244711... ok None \n", - "38 {'roc_auc': [0.9794168096054888, 0.99828473413... ok None \n", - "39 {'roc_auc': [0.9736992567181246, 0.99599771297... ok None \n", - "40 {'roc_auc': [0.982275586049171, 0.991423670668... ok None \n", + "27 None avoided None \n", + "28 {'roc_auc': [0.9981851179673321, 0.99637023593... ok None \n", + "29 None avoided None \n", + "30 {'roc_auc': [0.9879007864488808, 0.95583787053... ok None \n", + "31 {'roc_auc': [0.8974591651542649, 0.93829401088... ok None \n", + "32 {'roc_auc': [0.9975801572897761, 0.98850574712... ok None \n", + "33 {'roc_auc': [0.9764065335753176, 0.99576527525... ok None \n", + "34 {'roc_auc': [0.9830611010284331, 0.99879007864... ok None \n", "\n", " data-pre-processor_class data-pre-processor_hps \\\n", "0 None None \n", @@ -3947,32 +3756,26 @@ "12 None None \n", "13 None None \n", "14 None None \n", - "15 sklearn.preprocessing._data.MinMaxScaler None \n", - "16 sklearn.preprocessing._data.Normalizer None \n", - "17 sklearn.preprocessing._data.PowerTransformer None \n", - "18 sklearn.preprocessing._data.QuantileTransformer None \n", - "19 sklearn.preprocessing._data.RobustScaler None \n", - "20 sklearn.preprocessing._data.StandardScaler None \n", - "21 sklearn.feature_selection._variance_threshold.... None \n", - "22 None None \n", - "23 None None \n", - "24 None None \n", - "25 None None \n", - "26 None None \n", - "27 None None \n", - "28 None None \n", - "29 None None \n", - "30 None None \n", - "31 None None \n", - "32 None None \n", - "33 None None \n", - "34 None None \n", - "35 None None \n", - "36 None None \n", - "37 None None \n", - "38 None None \n", - "39 None None \n", - "40 None None \n", + "15 None None \n", + "16 sklearn.preprocessing._data.MinMaxScaler None \n", + "17 sklearn.preprocessing._data.Normalizer None \n", + "18 sklearn.preprocessing._data.PowerTransformer None \n", + "19 sklearn.preprocessing._data.QuantileTransformer None \n", + "20 sklearn.preprocessing._data.RobustScaler None \n", + "21 sklearn.preprocessing._data.StandardScaler None \n", + "22 sklearn.feature_selection._variance_threshold.... None \n", + "23 sklearn.feature_selection._variance_threshold.... None \n", + "24 sklearn.feature_selection._variance_threshold.... None \n", + "25 sklearn.feature_selection._variance_threshold.... None \n", + "26 sklearn.feature_selection._variance_threshold.... None \n", + "27 sklearn.feature_selection._variance_threshold.... None \n", + "28 sklearn.feature_selection._variance_threshold.... None \n", + "29 sklearn.feature_selection._variance_threshold.... None \n", + "30 sklearn.feature_selection._variance_threshold.... None \n", + "31 sklearn.feature_selection._variance_threshold.... None \n", + "32 sklearn.feature_selection._variance_threshold.... None \n", + "33 sklearn.feature_selection._variance_threshold.... None \n", + "34 sklearn.feature_selection._variance_threshold.... None \n", "\n", " feature-pre-processor_class \\\n", "0 None \n", @@ -3997,25 +3800,19 @@ "19 None \n", "20 None \n", "21 None \n", - "22 sklearn.decomposition._fastica.FastICA \n", - "23 sklearn.cluster._agglomerative.FeatureAgglomer... \n", - "24 sklearn.decomposition._kernel_pca.KernelPCA \n", - "25 sklearn.kernel_approximation.RBFSampler \n", - "26 sklearn.kernel_approximation.Nystroem \n", - "27 sklearn.decomposition._pca.PCA \n", - "28 sklearn.preprocessing._polynomial.PolynomialFe... \n", - "29 sklearn.feature_selection._univariate_selectio... \n", + "22 None \n", + "23 sklearn.decomposition._fastica.FastICA \n", + "24 sklearn.cluster._agglomerative.FeatureAgglomer... \n", + "25 sklearn.decomposition._kernel_pca.KernelPCA \n", + "26 sklearn.kernel_approximation.RBFSampler \n", + "27 sklearn.kernel_approximation.Nystroem \n", + "28 sklearn.decomposition._pca.PCA \n", + "29 sklearn.preprocessing._polynomial.PolynomialFe... \n", "30 sklearn.feature_selection._univariate_selectio... \n", - "31 None \n", + "31 sklearn.feature_selection._univariate_selectio... \n", "32 None \n", "33 None \n", "34 None \n", - "35 None \n", - "36 None \n", - "37 None \n", - "38 None \n", - "39 None \n", - "40 None \n", "\n", " feature-pre-processor_hps \\\n", "0 None \n", @@ -4053,12 +3850,6 @@ "32 None \n", "33 None \n", "34 None \n", - "35 None \n", - "36 None \n", - "37 None \n", - "38 None \n", - "39 None \n", - "40 None \n", "\n", " learner_class \\\n", "0 sklearn.ensemble._forest.ExtraTreesClassifier \n", @@ -4074,9 +3865,9 @@ "10 sklearn.svm._classes.SVC \n", "11 sklearn.svm._classes.SVC \n", "12 sklearn.svm._classes.SVC \n", - "13 sklearn.neural_network._multilayer_perceptron.... \n", - "14 sklearn.naive_bayes.MultinomialNB \n", - "15 sklearn.ensemble.HistGradientBoostingClassifier \n", + "13 sklearn.linear_model.LogisticRegression \n", + "14 sklearn.neural_network._multilayer_perceptron.... \n", + "15 sklearn.naive_bayes.MultinomialNB \n", "16 sklearn.ensemble.HistGradientBoostingClassifier \n", "17 sklearn.ensemble.HistGradientBoostingClassifier \n", "18 sklearn.ensemble.HistGradientBoostingClassifier \n", @@ -4096,12 +3887,6 @@ "32 sklearn.ensemble.HistGradientBoostingClassifier \n", "33 sklearn.ensemble.HistGradientBoostingClassifier \n", "34 sklearn.ensemble.HistGradientBoostingClassifier \n", - "35 sklearn.ensemble.HistGradientBoostingClassifier \n", - "36 sklearn.ensemble.HistGradientBoostingClassifier \n", - "37 sklearn.ensemble.HistGradientBoostingClassifier \n", - "38 sklearn.ensemble.HistGradientBoostingClassifier \n", - "39 sklearn.ensemble.HistGradientBoostingClassifier \n", - "40 sklearn.ensemble.HistGradientBoostingClassifier \n", "\n", " learner_hps \n", "0 None \n", @@ -4135,16 +3920,10 @@ "28 None \n", "29 None \n", "30 None \n", - "31 {'early_stop': 'valid', 'l2_regularization': 0... \n", - "32 {'early_stop': 'valid', 'l2_regularization': 3... \n", - "33 {'early_stop': 'off', 'l2_regularization': 4.9... \n", - "34 {'early_stop': 'off', 'l2_regularization': 0.2... \n", - "35 {'early_stop': 'train', 'l2_regularization': 0... \n", - "36 {'early_stop': 'valid', 'l2_regularization': 9... \n", - "37 {'early_stop': 'train', 'l2_regularization': 0... \n", - "38 {'early_stop': 'valid', 'l2_regularization': 0... \n", - "39 {'early_stop': 'off', 'l2_regularization': 8.3... \n", - "40 {'early_stop': 'off', 'l2_regularization': 0.1... " + "31 None \n", + "32 {'early_stop': 'valid', 'l2_regularization': 6... \n", + "33 {'early_stop': 'off', 'l2_regularization': 1.2... \n", + "34 {'early_stop': 'off', 'l2_regularization': 0.3... " ] }, "execution_count": 13, @@ -4164,7 +3943,7 @@ "outputs": [ { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -4194,7 +3973,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.11" + "version": "3.12.5" } }, "nbformat": 4,