-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathmodel.py
117 lines (97 loc) · 3.62 KB
/
model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
# %%
import sys
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.inspection import permutation_importance
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
import pathlib
np.random.seed(27)
# need this to ensure the import works properly
sys.path.insert(0, str(pathlib.Path(__file__).parent.parent.parent.absolute()))
from generate_segment_trajectories import get_basic_dataframe
def get_data(
normal_folders, anomalous_folders, agent_map_folder="../agent_maps", test_ratio=0.3
):
"""Get the raw data from the relevant dataprep functions, filter and split the data into train/test
Returns:
Tuple: X_train, X_test, y_train, y_test that are generated by shuffling the base raw data
"""
normal = get_basic_dataframe(
subfolders=normal_folders, agent_map_folder=agent_map_folder, max_agents=1000
)
anomalous = get_basic_dataframe(
subfolders=anomalous_folders, agent_map_folder=agent_map_folder, max_agents=1000
)
print("Got all dataframes")
normal = pd.concat(normal)
anomalous = pd.concat(anomalous)
print(
f"Nominal frame count: {normal.shape}\nAnomalous frame count:{anomalous.shape}"
)
useful_cols = [
"max_velocity_x",
"max_velocity_y",
"max_velocity_z",
"max_ang_velocity_x",
"max_ang_velocity_y",
"max_ang_velocity_z",
"min_velocity_x",
"min_velocity_y",
"min_velocity_z",
"min_ang_velocity_x",
"min_ang_velocity_y",
"min_ang_velocity_z",
"max_acc_x",
"max_acc_y",
"max_acc_z",
"min_acc_x",
"min_acc_y",
"min_acc_z",
]
X = pd.concat([normal[useful_cols], anomalous[useful_cols]])
y = np.hstack([np.zeros(normal.shape[0]), np.ones(anomalous.shape[0])])
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=test_ratio, random_state=42
)
return X_train, X_test, y_train, y_test
def build_randomforest_model(
normal_folders, anomalous_folders, agent_map_folder, data=None
):
if data is None:
X_train, X_test, y_train, y_test = get_data(
normal_folders, anomalous_folders, agent_map_folder
)
else:
X_train, X_test, y_train, y_test = data
RF = RandomForestClassifier(n_estimators=1000, max_depth=10, random_state=0)
RF_res_model = RF.fit(X_train, y_train)
RF_res_model.predict(X_test)
# round(RF_res_model.score(X_test, y_test), 4)
return RF_res_model, RF_res_model.score(X_test, y_test)
def build_mlp_classifier(normal_folders, anomalous_folders, agent_map_folder):
X_train, X_test, y_train, y_test = get_data(
normal_folders, anomalous_folders, agent_map_folder
)
NN = MLPClassifier(
solver="lbfgs", alpha=1e-5, hidden_layer_sizes=(150, 10), random_state=1
).fit(X_train, y_train)
NN.predict(X_test)
return NN, NN.score(X_test, y_test)
def show_permutation_imp(RF, X, y):
"""Show the permutation importance plot for the train/test set
Args:
dataset_type (str, optional): This can be "train" or "test".
Runs the permutation importance for either set. Defaults to "train".
"""
result = permutation_importance(RF, X, y, n_repeats=10, random_state=42, n_jobs=2)
sorted_idx = result.importances_mean.argsort()
fig, ax = plt.subplots()
ax.boxplot(
result.importances[sorted_idx].T, vert=False, labels=X.columns[sorted_idx]
)
ax.set_title("Permutation Importances")
fig.tight_layout()
plt.show()