-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathselect_features.py
50 lines (42 loc) · 1.98 KB
/
select_features.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import pandas as pd
from ncafs import NCAFSC
from sklearn.model_selection import LeaveOneOut
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
models = [KNeighborsClassifier(n_neighbors=3), SVC(), DecisionTreeClassifier()]
def select_features(X: pd.DataFrame, y: np.array, frequency_bands: dict):
n_features_band = X.iloc[0, 0].shape[1]
n_subjects = X.iloc[0, 0].shape[0]
loo = LeaveOneOut()
nca = NCAFSC(fit_method='average', n_splits=19,
standardize=True, n_features_to_select=5)
support_band = {}
# For each frequency band, we make a loo
for band in frequency_bands:
support = np.zeros((n_subjects, n_features_band))
for i, (train_index, test_index) in enumerate(loo.split(X.loc[0, band])
):
# We find features with nca
nca.fit(X.loc[0, band][train_index], y[train_index])
support[i, :] = nca.support_
# We test the selected features
X2 = nca.transform(X.loc[0, band])
print('Fold ' + str(i))
for model in models:
model.fit(X2[train_index], y[train_index])
print('Model: ' + type(model).__name__)
print('Score: ' + str(model.score(X2[test_index],
y[test_index])))
# We keep the features that appears in at least 50% of the folds
count_support = support.sum(axis=0)
count_support_sup = count_support >= n_subjects/2
# If there is not at least 5 features, we add the next most present
# features to have 5
count_features = int(count_support_sup.sum())
if count_features < 5:
for idx in range(count_features, 5):
count_support_sup[np.argsort(count_support)[::-1][idx]] = True
support_band[band] = count_support_sup
return support_band