-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathlog_parser.py
93 lines (90 loc) · 3.02 KB
/
log_parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
from xml.etree import ElementTree as ET
from urllib.parse import unquote,unquote_plus
import base64
import csv
log_path='bad_requests.log'
def parse_log(log_path):
result={}
try:
with open(log_path): pass
except IOError as e:
print("Error ",log_path,"doesn't exist")
exit()
try:
tree=ET.parse(log_path)
except Exception as e:
print("Please make sure binary data is not present in log")
exit()
root = tree.getroot()
for reqs in root.findall('item'):
raw_req = reqs.find('request').text
raw_req = unquote(raw_req)
raw_resp = reqs.find('response').text
result[raw_req] = raw_resp
return result
def parseRawHTTPReq(rawreq):
try:
raw = rawreq.decode('utf8')
except:
raw = rawreq
global headers,method,body,path
headers = {}
sp = raw.split('\r\n\r\n',1)
if sp[1]!="":
head=sp[0]
body=sp[1]
else:
head = sp[0]
body=""
c1 = head.split('\n',head.count('\n'))
method = c1[0].split(' ',2)[0]
path = c1[0].split(' ',2)[1]
for i in range(1, head.count('\n')+1):
slice1 = c1[i].split(': ',1)
if slice1[0] != "":
try:
headers[slice1[0]] = slice1[1]
except:
pass
return headers,method,body,path
badwords = ['sleep','drop','uid','uname','select','waitfor','delay','system','union','order by','group by']
category = "bad"
def ExtractFeatures(method,path_enc,body_enc,headers):
badwords_count = 0
path = unquote_plus(path_enc)
body = unquote(body_enc)
single_q = path.count("'") + body.count("'")
double_q = path.count("\"") + body.count("\"")
dashes = path.count("--") + body.count("--")
braces = path.count("(") + body.count("(")
spaces = path.count(" ") + body.count(" ")
for word in badwords:
badwords_count += path.count(word) + body.count(word)
for header in headers:
badwords_count += headers[header].count(word)
#if badwords_count-path.count('uid')-body.count('uid')-path.count('passw')-body.count('passwd')>0:
# category = 0
return [method,path_enc.encode('utf-8').strip(),body_enc.encode('utf-8').strip(),single_q,double_q,dashes,braces,spaces,badwords_count,category]
raw_input('>>>')
#Open the log file
f=open('demo_bad_responses.csv',"w")
c = csv.writer(f)
c.writerow(["method","path","body","single_q","double_q","dashes","braces","spaces","badwords","class"])
f.close()
#print(parse_log(log_path))
result = parse_log(log_path)
f = open('demo_bad_responses.csv','a')
c = csv.writer(f)
for items in result:
#data = []
raw = base64.b64decode(items)
headers,method,body,path = parseRawHTTPReq(raw)
#data.append(method)
#data.append(body)
#data.append(path)
#data.append(headers)
#f = open('httplog.csv','a')
#c = csv.writer(f)
data = ExtractFeatures(method,path,body,headers)
c.writerow(data)
f.close()