-
Notifications
You must be signed in to change notification settings - Fork 0
/
baselines.py
77 lines (57 loc) · 2.73 KB
/
baselines.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
"""Feature selection within ML pipelines. Wrappers methods. Currently includes recursive feature elimination."""
# ----------------------------------------------------------------------------------------------------------------------------
# LOGGING
# ----------------------------------------------------------------------------------------------------------------------------
import logging
logger = logging.getLogger(__name__)
while True:
try:
# ----------------------------------------------------------------------------------------------------------------------------
# Normal Imports
# ----------------------------------------------------------------------------------------------------------------------------
from typing import *
import pandas as pd, numpy as np
from sklearn.base import is_classifier, is_regressor
from sklearn.dummy import DummyClassifier, DummyRegressor
except Exception as e:
logger.warning(e)
if "cannot import name" in str(e):
raise (e)
# ----------------------------------------------------------------------------------------------------------------------------
# Packages auto-install
# ----------------------------------------------------------------------------------------------------------------------------
from pyutilz.pythonlib import ensure_installed
ensure_installed("numpy pandas scikit-learn")
else:
break
LARGE_CONST: float = 1e30
def get_best_dummy_score(
estimator: object,
X_train: Union[pd.DataFrame, np.ndarray],
y_train: Union[pd.DataFrame, np.ndarray, pd.Series],
X_test: Union[pd.DataFrame, np.ndarray],
y_test: Union[pd.DataFrame, np.ndarray, pd.Series],
scoring: object,
verbose:bool=False,
) -> float:
"""Given estimator type & train and test sets, finds the best respective dummy estimator"""
best_dummy_score = -LARGE_CONST
if is_classifier(estimator):
dummy_model_type = DummyClassifier
strategies = "most_frequent prior stratified uniform"
elif is_regressor(estimator):
dummy_model_type = DummyRegressor
strategies = "mean median"
else:
strategies = None
logger.error(f"Unexpected estimator type in get_best_dummy_score: {estimator}")
if strategies:
for strategy in strategies.split():
model = dummy_model_type(strategy=strategy)
model.fit(X=X_train, y=y_train)
dummy_score = scoring(model, X_test, y_test)
if verbose:
logger.info(f"strategy={strategy}, score={dummy_score:.6f}")
if dummy_score > best_dummy_score:
best_dummy_score = dummy_score
return best_dummy_score