-
Notifications
You must be signed in to change notification settings - Fork 2
/
asm_parser.py
81 lines (70 loc) · 2.48 KB
/
asm_parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
#!/usr/bin/env python3
from pyparsing import Word, hexnums, WordEnd, Optional, alphas, alphanums
from multiprocessing import Pool
import pandas as pd
import re
import os
import sys
class asm_cruncher:
def dll_finder(self,filename):
dll_list = {};
dll_list['file'] = filename
rex = '[A-Za-z0-9]*\.dll'
with open(filename,"r",encoding="ISO-8859-1")as source:
for line in source:
try:
x = re.search(rex,line,re.I).group(0).strip().lower()
if x in dll_list.keys():
dll_list[x] = dll_list[x] + 1
else:
dll_list[x] = 1
except Exception as e:
pass
return dll_list
def section_finder(self,filename):
sec_list = {}
sec_list['file'] = filename
with open(filename,"r", encoding = "ISO-8859-1")as source:
for line in source:
if line[0] == "." or line[0] == 'H':
x = line[:line.find(":")].lower()
if x in sec_list.keys():
sec_list[x] = sec_list[x] + 1
else:
sec_list[x] = 1
return sec_list
def opcode_finder(self,filename):
opcode_list = {}
opcode_list['file'] = filename
hex_integer = Word(hexnums) + WordEnd() # use WordEnd to avoid parsing leading a-f of non-hex numbers as a hex
line = hex_integer + Optional((hex_integer*(1,))("instructions") + Word(alphas,alphanums)("opcode"))
with open(filename,"r", encoding = "ISO-8859-1")as source:
for source_line in source:
if source_line[0] == '.':
try:
x = source_line[:source_line.find(":")+1]+line
result = x.parseString(source_line)
if "opcode" in result and not bool(re.match(r'[A-F0-9][A-F0-9]',result.opcode)):
if result.opcode in opcode_list.keys():
opcode_list[result.opcode] = opcode_list[result.opcode] + 1
else:
opcode_list[result.opcode] = 1
except Exception as e:
raise e
return opcode_list
if __name__ == '__main__':
proc = int(sys.argv[1])
obj = asm_cruncher()
res_list = {'dll_data.csv':obj.dll_finder, 'section_data.csv':obj.section_finder, 'opcode_data.csv':obj.opcode_finder}
o_path = os.getcwd()
for item in res_list.keys():
os.chdir(os.path.join(os.getcwd(), "Dataset", "asm"))
filelist = [x for x in os.listdir() if ".asm" in x]
with Pool(proc) as p:
df = p.map(res_list[item], filelist)
os.chdir(o_path)
frame = pd.DataFrame(df)
frame = frame.set_index('file')
pd.options.display.float_format = '{:.0f}'.format
print(frame, "\n")
frame.to_csv(os.path.join("results",item), sep = ',', encoding = 'utf-8', index = True, float_format="%.0f")