-
Notifications
You must be signed in to change notification settings - Fork 7
/
utils.py
61 lines (42 loc) · 1.9 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, StratifiedKFold
from joblib import Parallel, delayed
from tqdm.notebook import tqdm
def selecionar_melhor_k_knn(ks, X_treino, X_val, y_treino, y_val):
def treinar_knn(k, X_treino, X_val, y_treino, y_val):
knn = KNeighborsClassifier(n_neighbors=k)
knn.fit(X_treino, y_treino)
pred = knn.predict(X_val)
return accuracy_score(y_val, pred)
acuracias_val = Parallel(n_jobs=4)(delayed(treinar_knn)(k, X_treino, X_val, y_treino, y_val) for k in ks)
melhor_val = max(acuracias_val)
melhor_k = ks[np.argmax(acuracias_val)]
knn = KNeighborsClassifier(n_neighbors=melhor_k)
knn.fit(np.vstack((X_treino, X_val)), [*y_treino, *y_val])
return knn, melhor_k, melhor_val
def do_cv_knn(X, y, cv_splits, ks):
skf = StratifiedKFold(n_splits=cv_splits, shuffle=True, random_state=1)
acuracias = []
fold = 1
pgb = tqdm(total=cv_splits, desc='Folds avaliados')
for treino_idx, teste_idx in skf.split(X, y):
X_treino = X[treino_idx]
y_treino = y[treino_idx]
X_teste = X[teste_idx]
y_teste = y[teste_idx]
X_treino, X_val, y_treino, y_val = train_test_split(X_treino, y_treino, stratify=y_treino, test_size=0.2, random_state=1)
ss = StandardScaler()
ss.fit(X_treino)
X_treino = ss.transform(X_treino)
X_teste = ss.transform(X_teste)
X_val = ss.transform(X_val)
knn, _, _ = selecionar_melhor_k_knn(ks, X_treino, X_val, y_treino, y_val)
pred = knn.predict(X_teste)
acuracias.append(accuracy_score(y_teste, pred))
fold+=1
pgb.update(1)
pgb.close()
return acuracias