-
Notifications
You must be signed in to change notification settings - Fork 2
/
MLP.py
75 lines (62 loc) · 2.62 KB
/
MLP.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
"""
Classifier - MLP
Classify images from different families with a CNN for the public Kaggle dataset (https://www.kaggle.com/c/malware-classification/overview)
Author: Benoît Michel
Date : June 2021
"""
#from numpy.random import seed
#seed(1)
#from tensorflow import set_random_seed
#set_random_seed(2)
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import keras
import seaborn as sns
from keras.models import Sequential, Input, Model
from keras.layers import Dense, Dropout, Flatten, Softmax
from keras.layers import Conv2D, MaxPooling2D
from keras.layers.normalization import BatchNormalization
from keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split
from sklearn.utils import class_weight
from sklearn import metrics
import time
path_root = """\\Documents\\Unif\\MQ4\\LINGI2990_TFE\\Code\\Images_Test_TrueRGB\\""" # TO ADAPT ACCORDING TO YOUR DATASET LOCATION
batches = ImageDataGenerator().flow_from_directory(directory=path_root, target_size=(224,224), batch_size=10868)
#print(batches.class_indices)
imgs, labels = next(batches)
print(imgs.shape)
print(labels.shape)
classes = batches.class_indices.keys()
perc = (sum(labels)/labels.shape[0])*100
#plt.xticks(rotation='vertical')
#plt.bar(classes,perc)
#plt.show()
num_classes = 9 # TO ADAPT ACCORDING TO THE REAL NUMBER OF MALWARE FAMILIES
def malware_model(i):
print(i)
Malware_model = Sequential()
Malware_model.add(Flatten())
Malware_model.add(Dense(1024, input_shape=(224,224), activation='relu'))
#Malware_model.add(Dense(1024, activation='relu'))
#Malware_model.add(Dropout(0.1))
Malware_model.add(Dense(512, activation='relu'))
Malware_model.add(Dense(num_classes, activation='softmax'))
Malware_model.compile(loss='categorical_crossentropy', optimizer='Adam', metrics=['accuracy'])
return Malware_model
accuracies = list()
for i in [1]: # CHANGE LIST FOR TESTS ON PARAMETERS
X_train, X_test, y_train, y_test = train_test_split(imgs/255.,labels, test_size=0.3)
start = time.time()
Malware_model = malware_model(i)
#Malware_model.summary()
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=2)
Malware_model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=20, callbacks=[es])
scores = Malware_model.evaluate(X_test, y_test, verbose=1)
end = time.time()
print(end-start)
print('Final CNN accuracy: ', scores[1])
accuracies.append(scores[1])
print("Accuracies", accuracies)