-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmodule6_catability.py
122 lines (96 loc) · 3.07 KB
/
module6_catability.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import numpy as np
import pandas as pd
from lime import lime_tabular
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import utils
df = utils.load_small_tracks(buckets="discrete")
# df = df.head(100)
# CAMBIO ALBUM TYPE IN BINARIA
# print("prima", df["album", "type"].unique())
# df["album", "type"] = df["album", "type"].replace(
# ["Single Tracks", "Live Performance", "Radio Program"],
# ["NotAlbum", "NotAlbum", "NotAlbum"],
# )
# print("dopo", df["album", "type"].unique())
label_encoders = dict()
column2encode = [
("track", "duration"),
("track", "interest"),
("track", "listens"),
# ("album", "type"),
]
for col in column2encode:
le = LabelEncoder()
df[col] = le.fit_transform(df[col])
label_encoders[col] = le
df.info()
# print(df[df["album", "type"] == "NotAlbum"].head())
class_name = ("album", "type")
df["index-cat"] = df.index # add index as last column
attributes = [col for col in df.columns if col != class_name]
X = df[attributes].values
y = df[class_name]
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=42
)
clf = RandomForestClassifier(
n_estimators=100,
criterion="gini",
max_depth=17,
min_samples_split=3,
min_samples_leaf=3,
max_features="auto",
random_state=10,
class_weight="balanced",
)
clf.fit(X_train, y_train)
# text_representation = tree.export_text(clf)
# print(text_representation)
score = clf.score(X_test, y_test)
def bb_predict(X):
return clf.predict(X)
def bb_predict_proba(X):
return clf.predict_proba(X)
y_pred = bb_predict(X_test)
print("Accuracy %.3f" % accuracy_score(y_test, y_pred))
# print("F1-measure %.3f" % f1_score(y_test, y_pred))
"""LIME
lime_explainer = LimeTabularExplainer(
X_test,
feature_names=df.columns,
class_names=[str(v) for v in df.values],
discretize_continuous=False,
)
exp = lime_explainer.explain_instance(X_test, bb_predict_proba)
print(exp.local_exp)
# print(exp.show_in_notebook())
"""
class_names = ["Album", "Single Tracks", "Live Performance", "Radio Program"]
explainer = lime_tabular.LimeTabularExplainer(
training_data=np.array(X_train),
feature_names=df.columns,
class_names=class_names,
mode="classification",
)
X_test = pd.DataFrame(X_test[:, :-1], columns=df.columns[1:-1], index=X_test[:, -1])
print(X_test.info())
single_Tracks = df[df["album", "type"] == "Single Tracks"].head()
print(single_Tracks)
records = single_Tracks
prova = X_test.loc[[135, 136, 155, 169, 170], :]
print(prova)
# for one_class in class_names:
# records = X_test[X_test[("artist", "website")] == 1].tail(5)
for record in records:
print(record)
i2e = record.index
x = record.values
exp = explainer.explain_instance(data_row=x, predict_fn=clf.predict_proba)
print(exp.local_exp)
exp.save_to_file("porco.html")
# bb_outcome = bb_predict(x.reshape(1, -1))[0]
# bb_outcome_str = df.values[bb_outcome]
# print("bb(x) = { %s }" % bb_outcome_str)