-
Notifications
You must be signed in to change notification settings - Fork 3
/
binary2image.py
284 lines (231 loc) · 7.96 KB
/
binary2image.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
"""
Binary to Image Converter
Read executable binary files and convert them RGB and greyscale png images
Author: Benoît Michel inspired from https://github.com/ncarkaci/binary-to-image/blob/master/binary2image.py
Date : June 2021
"""
import os, math
import argparse
from PIL import Image
from queue import Queue
from threading import Thread
import time
import pefile
def getBinaryData(filename):
"""
Extract byte values from binary executable file and store them into list
:param filename: executable file name
:return: byte value list
"""
binary_values = []
with open(filename, 'rb') as fileobject:
# read file byte by byte
data = fileobject.read(1)
while data != b'':
binary_values.append(ord(data))
data = fileobject.read(1)
return binary_values
def createGreyScaleImage(filename, width=None):
"""
Create greyscale image from binary data. Use given with if defined or create square size image from binary data.
:param filename: image filename
"""
greyscale_data = getBinaryData(filename)
size = get_size(len(greyscale_data), width)
save_file(filename, greyscale_data, size, 'L', 'L')
def createRGBImage(filename, width=None):
"""
Create RGB image from 24 bit binary data 8bit Red, 8 bit Green, 8bit Blue
:param filename: image filename
"""
index = 0
rgb_data = []
# Read binary file
binary_data = getBinaryData(filename)
# Create R,G,B pixels
while (index + 3) < len(binary_data):
R = binary_data[index]
G = binary_data[index+1]
B = binary_data[index+2]
index += 3
rgb_data.append((R, G, B))
size = get_size(len(rgb_data), width)
save_file(filename, rgb_data, size, 'RGB', 'RGB')
def createTrueRGBImage(filename, width):
"""
Create RGB image from binary.
Channel Red = global entropy
Channel Green = raw bit adapted to range 0:256
Channel Blue = local entropy
"""
rgb_data = list()
binary_data = getBinaryData(filename)
index = 0
while (index+256) < len(binary_data):
entropy = 0
entropy2 = 0
probs = {}
for i in range(8):
key = binary_data[index+i]
probs[key] = probs.get(key, 0) + 1
for key in probs:
entropy2 += probs[key]/256 * math.log(probs[key]/256, 2)
for i in range(8,256):
key = binary_data[index+i]
probs[key] = probs.get(key, 0) + 1
for key in probs:
entropy += probs[key]/256 * math.log(probs[key]/256, 2)
R = -int(entropy * 31.875) #255/8
G = binary_data[index]
B = -int(entropy2 * 31.875 * 85) - 423 #int((R+G)/2)
index += 1
rgb_data.append((R, G, B))
size = get_size(len(rgb_data), width)
save_file(filename, rgb_data, size, 'RGB', 'TrueRGB')
def createDisassembledImage(filename, width):
"""
Create RGB image from binary.
Channel Red = global entropy
Channel Green = raw bit adapted to range 0:256
Channel Blue = section size/total size
"""
greyscale_data = list()
binary_data = getBinaryData(filename)
try:
pe = pefile.PE(filename)
except OSError as e:
print(e)
except pefile.PEFormatError as e:
print("[-] PEFormatError: %s" % e.value)
section_sizes = list()
sum = 0
for section in pe.sections:
section_sizes.append(section.SizeOfRawData)
sum += section.SizeOfRawData
ratio = len(binary_data)/sum
for i in range(len(section_sizes)):
new_size = math.ceil(section_sizes[i]*ratio)
section_sizes[i] = [new_size,new_size]
index = 0
while (index+256) < len(binary_data):
entropy = 0
probs = {}
for i in range(0,256):
key = binary_data[index+i]
probs[key] = probs.get(key, 0) + 1
for key in probs:
entropy += probs[key]/256 * math.log(probs[key]/256, 2)
if section_sizes[0][0] == 0:
section_sizes.pop()
section = section_sizes[0][1]
section_sizes[0][0] = section_sizes[0][0]-1
R = -int(entropy * 31.875) #255/8
G = binary_data[index]
B = (section/len(binary_data))*255
index += 1
greyscale_data.append(0.299*R + 0.587*G + 0.114*B)
size = get_size(len(greyscale_data), width)
save_file(filename, greyscale_data, size, 'L', 'Disassembled')
def save_file(filename, data, size, image_type, parent_name):
"""
Saves the file with filename and image_type as an image in the parent directory in a subfolder according to the parent_name.
:param filename: executable file name
:param data: the data of the image
:param size: the total size of the image
:param image_type: the type of the image added to the filename for the image name
:param parent_name: the name of the subfolder where to save the image
:return: None
"""
try:
image = Image.new(image_type, size)
image.putdata(data)
# setup output filename
dirname = os.path.dirname(filename)
name, _ = os.path.splitext(filename)
name = os.path.basename(name)
imagename = dirname + os.sep + parent_name + os.sep + name + '_' + image_type + '.png'
os.makedirs(os.path.dirname(imagename), exist_ok=True)
image.save(imagename)
print('The file', imagename, 'saved.')
except Exception as err:
print(err)
def get_size(data_length, width=None):
"""
Returns the dimesnions of the image according to the data size of the file, inspired from visualization and automatic classification by L. Nataraj (http://dl.acm.org/citation.cfm?id=2016908)
:param data_length: the number of bytes of the executable file
:param width: the desired width for the image
:return: (width of the image, height of the image)
"""
if width is None: # with don't specified any with value
size = data_length
if (size < 10240):
width = 32
elif (10240 <= size <= 10240 * 3):
width = 64
elif (10240 * 3 <= size <= 10240 * 6):
width = 128
elif (10240 * 6 <= size <= 10240 * 10):
width = 256
elif (10240 * 10 <= size <= 10240 * 20):
width = 384
elif (10240 * 20 <= size <= 10240 * 50):
width = 512
elif (10240 * 50 <= size <= 10240 * 100):
width = 768
else:
width = 1024
height = int(size / width) + 1
else:
width = int(math.sqrt(data_length)) + 1
height = width
return (width, height)
def run(file_queue, width):
"""
For each executable file in the queue, create the different visualizations.
:param file_queue: the queue with all the executable files to transform
:param width: the desired width for the images
:return: None
"""
while not file_queue.empty():
filename = file_queue.get()
createGreyScaleImage(filename, width)
createRGBImage(filename, width)
createTrueRGBImage(filename, width)
createDisassembledImage(filename, width)
file_queue.task_done()
def main(input_dir, width=None, thread_number=7):
"""
Get all executable files in input directory and add them into queue and launch the threads to create the visualizations.
:param input_dir: the directory where to search for the executable files
:param width: the desired width for the images
:thread_number: the desired number of used threads
:return: None
"""
count=0
file_queue = Queue()
for root, directories, files in os.walk(input_dir):
for filename in files:
file_path = os.path.join(root, filename)
file_queue.put(file_path)
for index in range(thread_number):
thread = Thread(target=run, args=(file_queue, width))
thread.daemon = True
thread.start()
file_queue.join()
if __name__ == '__main__':
""" To use the tool with the command line """
#parser = argparse.ArgumentParser(prog='binar2image.py', description="Convert binary file to image")
#parser.add_argument(dest='input_dir', help='Input directory path is which include executable files')
#args = parser.parse_args()
#main(args.input_dir, width=None)
""" To generate visualizations of a dataset for all families """
#main("Malwares/Malware_Dataset")
""" To generate visualizations of a family """
#main("Malwares/Malware_Dataset/amonetize")
""" To generate visualizations and measure time """
print("Begin !")
start = time.time()
main("Time_test/L100_Cleanwares")
end = time.time()
print(end-start)
print("End !")