-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgrid_search.py
38 lines (25 loc) · 880 Bytes
/
grid_search.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
from constants import NP_DATASET_KMEANS_PATH
from pipelines import preprocessing_pipeline
from utils import get_dataset
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
import numpy as np
import joblib
pipeline = Pipeline(steps=(
('preprocessing', preprocessing_pipeline),
('svm', SVC())
))
Cs = np.arange(1, 1100) / 1000
svm_params = {
'svm__C': Cs
}
model_path = 'svm__30l_10ol__median__min_max_scaler__kmeans.pkl'
scoring = ['accuracy', 'f1_macro', 'f1_micro']
if __name__ == '__main__':
X, y = get_dataset(NP_DATASET_KMEANS_PATH)
gscv = GridSearchCV(pipeline, svm_params, scoring=scoring, cv=5, n_jobs=-1, refit='f1_micro', verbose=3)
gscv.fit(X, y)
print(f"Best score: {gscv.best_score_}")
best_model = gscv.best_estimator_
joblib.dump(best_model, model_path)