-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathdigit.py
136 lines (120 loc) · 5.26 KB
/
digit.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
#!/usr/bin/env python3
import re
import os
import sys
import argparse
from urllib import parse
from shutil import rmtree
from git import Repo
from colorama import Fore, init
work_dir = '/tmp/digit_dir/'
init()
def banner():
print(f'''{Fore.GREEN}
________ .___ ________ .______________
\______ \ | | / _____/ | \__ ___/
| | \ | | / \ ___ | | | |
| ` \ | | \ \_\ \ | | | |
/_______ / |___| \______ / |___| |____|
\/ \/
{Fore.BLUE}Made with {Fore.RED}♥ {Fore.BLUE}by Abdelrhman(@aufzayed){Fore.RESET}
''')
banner()
arg_parser = argparse.ArgumentParser()
arg_parser.add_argument('--repo-list', metavar='', help='a file containing a list of repos to extract data from them')
arg_parser.add_argument('--output', metavar='', help='output directory')
args = arg_parser.parse_args()
if args.repo_list is not None:
repo_list_abs_path = os.path.abspath(args.repo_list)
with open(repo_list_abs_path) as repo_list:
urls_list = [url.split('\n')[0] for url in repo_list.readlines()]
elif args.repo_list is None:
urls_list = [url.split('\n')[0] for url in sys.stdin]
else:
sys.exit()
if args.output is not None:
output_path = os.path.abspath(args.output)
else:
output_path = os.path.abspath('.')
def digit(name, url, output):
endpoints = set()
try:
print(f'{Fore.BLUE}[+] {Fore.GREEN}Cloning {Fore.BLUE}{url}')
Repo.clone_from(url, f'{work_dir}{name}')
print(f'{Fore.BLUE}[+] {Fore.GREEN}repo Cloned')
# regex by LinkFinder team https://github.com/GerbenJavado/LinkFinder
endpoints_regex = r"""
(?:"|') # Start newline delimiter
(
((?:[a-zA-Z]{1,10}://|//) # Match a scheme [a-Z]*1-10 or //
[^"'/]{1,}\. # Match a domain name (any character + dot)
[a-zA-Z]{2,}[^"']{0,}) # The domain extension and/or path
|
((?:/|\.\./|\./) # Start with /,../,./
[^"'><,;| *()(%%$^/\\\[\]] # Next character can't be...
[^"'><,;|()]{1,}) # Rest of the characters can't be
|
([a-zA-Z0-9_\-/]{1,}/ # Relative endpoint with /
[a-zA-Z0-9_\-/]{1,} # Resource name
\.(?:[a-zA-Z]{1,4}|action) # Rest + extension (length 1-4 or action)
(?:[\?|#][^"|']{0,}|)) # ? or # mark with parameters
|
([a-zA-Z0-9_\-/]{1,}/ # REST API (no extension) with /
[a-zA-Z0-9_\-/]{3,} # Proper REST endpoints usually have 3+ chars
(?:[\?|#][^"|']{0,}|)) # ? or # mark with parameters
|
([a-zA-Z0-9_\-]{1,} # filename
\.(?:php|asp|aspx|jsp|json|
action|html|js|txt|xml) # . + extension
(?:[\?|#][^"|']{0,}|)) # ? or # mark with parameters
)
(?:"|') # End newline delimiter
"""
files_list = []
for dirs_paths, dirs_list, files_names in os.walk(f'{work_dir}{name}'):
for file_name in files_names:
files_list.append(os.path.join(dirs_paths, file_name))
print(f'{Fore.BLUE}[+] {Fore.GREEN}Extract Endpoints')
for file in files_list:
with open(file, 'r') as f:
try:
lines = f.readlines()
for line in lines:
regex_patterns = re.compile(endpoints_regex, re.VERBOSE)
regex_matches = regex_patterns.findall(line)
for match in regex_matches:
for endpoint in match:
if endpoint != '':
endpoint = parse.urlparse(endpoint).path
try:
if endpoint[0] == '/':
endpoints.add(endpoint[1:])
else:
endpoints.add(endpoint)
except IndexError:
pass
except UnicodeDecodeError:
pass
if len(endpoints) != 0:
print(f'{Fore.BLUE}[+] {Fore.GREEN}saving results')
print(f'{Fore.BLUE}[+] {Fore.GREEN}{len(endpoints)} Endpoints Found!')
with open(f'{output}/{repo_name}_endpoints.txt', 'a') as file:
for end in sorted(endpoints):
file.write(f'{end}\n')
else:
print(f'{Fore.RED}[!] No Endpoints Found.{Fore.RESET}')
except Exception as e:
print(f'{Fore.RED}[!] Error occurred while cloning {url}')
print(e)
pass
if __name__ == '__main__':
for url in urls_list:
repo_name = url.split('/')[-1]
repo_url = url
try:
os.mkdir(work_dir)
except FileExistsError:
pass
digit(repo_name, repo_url, output_path)
print(f'{Fore.BLUE}[+] {Fore.GREEN}deleting temp files')
rmtree(work_dir)