-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathload_ffri2018_D.py
77 lines (65 loc) · 3.09 KB
/
load_ffri2018_D.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import re, pandas as pd
from pathlib import Path
# folder_list : ['<Path>/cleanware', '<Path>/malware']
# api_list_area : '555*.txt', To limit dimensions.
def make_api_list(folder_list, api_list_area):
api_list = ['label']
for folder in folder_list:
file_list = [str(file) for file in Path(folder).glob(api_list_area)]
# To arrange in numerical order
file_list = [(int(re.search(r"[0-9]+", file).group()), file) for file in file_list]
file_list.sort()
file_list = [x[1] for x in file_list]
for file in file_list:
with open(file) as f:
for line in f.readlines():
# e.g.) KERNEL32.dll.GetTickCount Hint[469]
if 'Hint' in line and '.' in line and not '@' in line and not '?' in line and not '$' in line and not '*' in line:
try:
api = line.split()[0]
except:
continue
if not api in api_list:
api_list.append(api)
return api_list
# file : Target malware
# api_list : label + some APIs
def make_used_api_dataframe_with_malware_file(file, api_list):
used_api_dict = {api:[0] for api in api_list}
with open(file) as f:
for line in f.readlines():
if 'Hint' in line and '.' in line and not '@' in line and not '?' in line and not '$' in line and not '*' in line:
try:
api = line.split()[0]
except:
continue
if api in used_api_dict.keys():
used_api_dict[api][0] = 1
else:
used_api_dict[api] = [1]
used_api_dict['label'][0] = 1
return pd.DataFrame.from_dict(used_api_dict)
# folder_list : ['<Path>/cleanware']
# data_area_list : ['555*.txt'], To limit
# api_list : label + some APIs + Target malware culumns
def make_used_api_dataframe(folder_list, data_area_list, api_list):
used_api_dict = {api:[] for api in api_list}
for folder in folder_list:
for data_area in data_area_list:
file_list = [str(file) for file in Path(folder).glob(data_area)]
file_list = [(int(re.search(r"[0-9]+", file).group()), file) for file in file_list]
file_list.sort()
file_list = [x[1] for x in file_list]
for file in file_list:
with open(file) as f:
for api in api_list:
used_api_dict[api].append(0)
for line in f.readlines():
if 'Hint' in line and '.' in line and not '@' in line and not '?' in line and not '$' in line and not '*' in line:
try:
api = line.split()[0]
except:
continue
if api in used_api_dict.keys():
used_api_dict[api][len(used_api_dict['label']) - 1] = 1
return pd.DataFrame.from_dict(used_api_dict)