-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathwappalyze.py
139 lines (117 loc) · 4.13 KB
/
wappalyze.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
import json
import re
import warnings
import pkg_resources
import requests
from bs4 import BeautifulSoup
def _parse_webpage(url):
webpage={}
response = requests.get(url)
webpage['url'] = response.url
webpage['headers'] = response.headers
webpage['response'] = response.text
webpage['html'] = BeautifulSoup(response.text, 'html.parser')
webpage['scripts'] = [script['src'] for script in webpage['html'].findAll('script', src=True)]
webpage['metatags'] = {meta['name'].lower(): meta['content']
for meta in webpage['html'].findAll('meta', attrs=dict(name=True, content=True))}
return webpage
def _prepare_app(app):
for key in ['url', 'html', 'script', 'implies']:
try:
value = app[key]
except KeyError:
app[key] = []
else:
if not isinstance(value, list):
app[key] = [value]
for key in ['headers', 'meta']:
try:
value = app[key]
except KeyError:
app[key] = {}
obj = app['meta']
if not isinstance(obj, dict):
app['meta'] = {'generator': obj}
for key in ['headers', 'meta']:
obj = app[key]
app[key] = {k.lower(): v for k, v in obj.items()}
for key in ['url', 'html', 'script']:
app[key] = [_prepare_pattern(pattern) for pattern in app[key]]
for key in ['headers', 'meta']:
obj = app[key]
for name, pattern in obj.items():
obj[name] = _prepare_pattern(obj[name])
def _prepare_pattern(pattern):
regex, _, rest = pattern.partition('\\;')
try:
return re.compile(regex, re.I)
except re.error as e:
warnings.warn(
"Caught '{error}' compiling regex: {regex}"
.format(error=e, regex=regex)
)
# regex that never matches:
# http://stackoverflow.com/a/1845097/413622
return re.compile(r'(?!x)x')
def _has_app(app, webpage):
for regex in app['url']:
if regex.search(webpage['url']):
return True
for name, regex in app['headers'].items():
if name in webpage['headers']:
content = webpage['headers'][name]
if regex.search(content):
return True
for regex in app['script']:
for script in webpage['scripts']:
if regex.search(script):
return True
for name, regex in app['meta'].items():
if name in webpage['metatags']:
content = webpage['metatags'][name]
if regex.search(content):
return True
for regex in app['html']:
if regex.search(webpage['response']):
return True
def _get_implied_apps(detected_apps, apps1):
def __get_implied_apps(detect, apps):
_implied_apps = set()
for detected in detect:
try:
_implied_apps.update(set(apps[detected]['implies']))
except KeyError:
pass
return _implied_apps
implied_apps = __get_implied_apps(detected_apps, apps1)
all_implied_apps = set()
while not all_implied_apps.issuperset(implied_apps):
all_implied_apps.update(implied_apps)
implied_apps = __get_implied_apps(all_implied_apps, apps1)
return all_implied_apps
def analyze():
url = raw_input('Enter the URL: ')
webpage = _parse_webpage(url)
obj = json.loads(pkg_resources.resource_string(__name__, "data/apps.json"))
apps = obj['apps']
detected = []
for app_name, app in apps.items():
_prepare_app(app)
if _has_app(app, webpage):
detected.append(app_name)
detected = set(detected).union(_get_implied_apps(detected, apps))
category_wise = {}
for app_name in detected:
cats = apps[app_name]['cats']
for cat in cats:
category_wise[app_name] = obj['categories'][str(cat)]['name']
inv_map = {}
for k, v in category_wise.iteritems():
inv_map[v] = inv_map.get(v, [])
inv_map[v].append(k)
return inv_map
if __name__ == '__main__':
printing = analyze()
for x in printing.items():
string = '\nCategory: '+str(x[0])+'\nFrameworks :'+','.join(x[1])
print string