-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathfirst_it_plots.py
57 lines (47 loc) · 2.09 KB
/
first_it_plots.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import pandas as pd
import statsmodels.api as sm
import seaborn as sns
from matplotlib import pyplot as plt
from src.preprocess import read_data
from src.visualization import plot_distribution, plot_qqplot, plot_correlation_matrix, plot_confusion_matrix, plot_feature_importance
from sklearn.model_selection import train_test_split
from src.model import train
from sklearn.metrics import classification_report
X, y = read_data("./data/data.csv")
# Plots for each feature the corresponding qq plot.
# Comment plt.show() in plot_qqplot function before saving as .svg.
for col in X:
plot_qqplot(X[col])
# plt.savefig("qq_plot_" + str(col) + ".svg", bbox_inches='tight')
plt.clf()
# Plots for each feature the corresponding distribution plot.
# Comment plt.show() in plot_distribution function before saving as .svg.
for col in X:
plot_distribution(X[col])
# plt.savefig("distr_plot_" + str(col) + ".svg", bbox_inches='tight')
plt.clf()
# Plots the correlation matrix for the features.
# Comment plt.show() in plot_correlation_matrix function before saving as .svg.
plot_correlation_matrix(X)
# plt.savefig("corr_matrix" + ".svg", bbox_inches='tight')
plt.clf()
print(X.mean())
X_train, X_test, y_train, y_test = train_test_split(X, y,
random_state=42,
stratify=y,
train_size=0.7)
rf = train(X_train, y_train)
pred = rf.predict(X_test)
print(classification_report(y_test, pred))
# Plots the confusion matrix for RF.
# Comment plt.show() in plot_confusion_matrix function before saving as .svg.
plot_confusion_matrix(y_test, pred, [0, 1])
# plt.savefig("conf_matrix" + ".svg", bbox_inches='tight')
plt.clf()
feature_scores = pd.Series(rf["rf"].feature_importances_, index=X_train.columns).sort_values(ascending=False)
print(feature_scores)
# Plots the confusion matrix for RF.
# Comment plt.show() in plot_confusion_matrix function before saving as .svg.
plot_feature_importance(feature_scores)
# plt.savefig("feature_imp" + ".svg", bbox_inches='tight')
plt.clf()