Skip to content

Commit

Permalink
add webscraping + selenium scraping
Browse files Browse the repository at this point in the history
  • Loading branch information
ElNiak committed Jun 16, 2024
1 parent 3dc67fb commit 955f3a7
Show file tree
Hide file tree
Showing 8 changed files with 191 additions and 55 deletions.
18 changes: 14 additions & 4 deletions bounty_drive/attacks/dorks/google_dorking.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
USER_AGENTS,
use_nordvpn,
)
from utils.nord_vpn_config import *
from utils.proxies import round_robin_proxies
from utils.request_manager import param_converter, start_request
from utils.results_manager import safe_add_result
Expand All @@ -43,7 +44,7 @@ def google_search_with_proxy(
advanced=False,
total_output=TOTAL_OUTPUT,
generated_dorks=True,
secured=True,
secured=False,
):
try:
query, extension, category = dork_tuple
Expand All @@ -54,11 +55,15 @@ def google_search_with_proxy(
base_url = "https://www.google.com/search"
headers = {
"User-Agent": random.choice(USER_AGENTS),
"Connection": "close",
}

if "socks5" in proxy:
if "username:password" in proxy:
nord_vpn_user_pass = random.choice(nord_vpn_login)
proxy = proxy.replace("username", nord_vpn_user_pass[0]).replace(
"password", nord_vpn_user_pass[1]
)
proxies = {"https": proxy}
secured = True
else:
proxies = {"http": proxy, "https": proxy}

Expand Down Expand Up @@ -284,7 +289,12 @@ def load_google_dorks_and_search(extensions=None, proxies=None):
}
for future in tqdm(
concurrent.futures.as_completed(future_to_search),
total=len(future_to_search),
total=len(future_to_search)
* (
1
if len(dorking_config.SUBDOMAIN) == 0
else len(dorking_config.SUBDOMAIN)
),
desc="Searching for vulnerable website",
unit="site",
# leave=True,
Expand Down
39 changes: 34 additions & 5 deletions bounty_drive/attacks/xss/xss.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from utils.waf_mitigation import waf_detector
from utils.app_config import POTENTIAL_PATHS, USER_AGENTS, VULN_PATHS
from utils.request_manager import inject_params, inject_payload
from utils.nord_vpn_config import *

try:
from selenium import webdriver
Expand Down Expand Up @@ -101,10 +102,14 @@ def generate_xss_urls(url):
starting_strings = ["", "crlfsuite", "?crlfsuite=", "#", "__session_start__/"]

if is_param:
cprint(f"URL parameters: {url}", color="yellow", file=sys.stderr)
del starting_strings[2]
for string in starting_strings:
for each_escape in escape_chars:
injected_urls = inject_params(url, string + each_escape + injection)
cprint(
f"Injected URLs: {injected_urls}", color="yellow", file=sys.stderr
)
for each_injected_url in injected_urls:
parsed_urls.add(each_injected_url)

Expand All @@ -113,6 +118,7 @@ def generate_xss_urls(url):
for injected in _injected:
parsed_urls.add(injected)
else:
cprint(f"URL non parameters: {url}", color="yellow", file=sys.stderr)
if not url.endswith("/"):
url = url + "/"
else:
Expand All @@ -121,6 +127,7 @@ def generate_xss_urls(url):
for each_escape in escape_chars:
parsed_urls.add(url + string + each_escape + injection)
for payload in xss_payloads:
cprint(f"Payload: {payload}", color="yellow", file=sys.stderr)
parsed_urls.add(url + payload)
total_len = len(parsed_urls)

Expand All @@ -129,8 +136,11 @@ def generate_xss_urls(url):

def test_xss_target(url, proxy):
total_parsed_targets = []
cprint("Intializing Payload Generator...", color="yellow", file=sys.stderr)
cprint(
f"Intializing Payload Generator for url {url}", color="yellow", file=sys.stderr
)
parsed_target = generate_xss_urls(url)
cprint(f"Generated {parsed_target[1]} payloads", color="yellow", file=sys.stderr)
for each in parsed_target[0]:
total_parsed_targets.append(each)

Expand Down Expand Up @@ -160,7 +170,6 @@ def test_vulnerability_xss(proxies):
# driver = webdriver.Chrome(service=s)

new_urls = []
headers = {"User-Agent": random.choice(USER_AGENTS)}

lock = threading.Lock()

Expand All @@ -176,22 +185,41 @@ def test_vulnerability_xss(proxies):
) as executor:
future_to_search = {
executor.submit(
scrape_links_from_url, task["website"], task["proxy"], headers
scrape_links_from_url, task["website"], task["proxy"]
): task
for task in search_tasks_with_proxy
}
for website in tqdm(
concurrent.futures.as_completed(future_to_search),
desc=f"Upating links DB for {website}",
desc=f"Upating links DB for xss website",
unit="site",
total=len(future_to_search),
):
with lock:
new_urls += website.result()

cprint(f"Found {len(new_urls)} new links", color="green", file=sys.stderr)

# crawl the website for more links TODO

POTENTIAL_PATHS["xss"][1] += new_urls

POTENTIAL_PATHS["xss"][1] = list(set(POTENTIAL_PATHS["xss"][1]))

cprint(
f"Total links: {len(POTENTIAL_PATHS['xss'][1])}",
color="green",
file=sys.stderr,
)

with open(POTENTIAL_PATHS["xss"][0], "r") as file: # Open file in append mode
writen_urls = file.readlines()

with open(POTENTIAL_PATHS["xss"][0], "a") as file:
for url in POTENTIAL_PATHS["xss"][1]:
if url not in writen_urls:
file.write(url + "\n")

# Now, append a proxy to each task
number_of_worker = len(proxies)
search_tasks_with_proxy = []
Expand All @@ -208,8 +236,9 @@ def test_vulnerability_xss(proxies):
}
for website in tqdm(
concurrent.futures.as_completed(future_to_search),
desc=f"Testing for XSS for {website}",
desc=f"Testing for XSS",
unit="site",
total=len(future_to_search),
):
result, payload_url = website.result()
if result:
Expand Down
70 changes: 43 additions & 27 deletions bounty_drive/bounty_drive.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@

from utils.proxies import setup_proxies
from utils.app_config import *
from utils.nord_vpn_config import *


from attacks.xss.xss import test_vulnerability_xss
from attacks.xss.xss_config import *
Expand Down Expand Up @@ -196,24 +198,6 @@ def get_user_input():
)
or "true"
)
total_output = (
input(
colored(
f"Please specify the total no. of websites you want [default: {DEFAULT_TOTAL_OUTPUT}] \n----> ",
"cyan",
)
)
or DEFAULT_TOTAL_OUTPUT
)
page_no = (
input(
colored(
f"From which Google page you want to start(eg- 1,2,3) [default: {DEFAULT_PAGE_NO}] \n----> ",
"cyan",
)
)
or DEFAULT_PAGE_NO
)

do_dorking_google = (
input(
Expand All @@ -224,6 +208,32 @@ def get_user_input():
)
or "true"
)
do_dorking_google = True if do_dorking_google.lower() == "true" else False
total_output = DEFAULT_TOTAL_OUTPUT
page_no = DEFAULT_PAGE_NO
if do_dorking_google:
total_output = (
input(
colored(
f"Please specify the total no. of websites you want [default: {DEFAULT_TOTAL_OUTPUT}] \n----> ",
"cyan",
)
)
or DEFAULT_TOTAL_OUTPUT
)
page_no = (
input(
colored(
f"From which Google page you want to start(eg- 1,2,3) [default: {DEFAULT_PAGE_NO}] \n----> ",
"cyan",
)
)
or DEFAULT_PAGE_NO
)
# Ensure numeric inputs are correctly converted to integers
TOTAL_OUTPUT = int(total_output)
PAGE_NO = int(page_no)

do_dorking_github = (
input(
colored(
Expand Down Expand Up @@ -289,10 +299,6 @@ def get_user_input():
or "false"
)

# Ensure numeric inputs are correctly converted to integers
TOTAL_OUTPUT = int(total_output)
PAGE_NO = int(page_no)
do_dorking_google = True if do_dorking_google.lower() == "true" else False
do_dorking_github = True if do_dorking_github.lower() == "true" else False
subdomain = True if subdomain.lower() == "true" else False
use_proxy = True if use_proxy.lower() == "true" else False
Expand Down Expand Up @@ -378,25 +384,35 @@ def get_user_input():
# proxies = setup_proxies()
try:
# Read NordVPN logins csv
if os.path.exists("proxies/nordvpn_login.csv"):
if os.path.exists("proxies/nordvpn_login.csv") and use_vpn:
with open("proxies/nordvpn_login.csv", "r") as file:
nordvpn = list(csv.reader(file))
username = nordvpn[1][0]
password = nordvpn[1][1]
for i in range(1, len(nordvpn)):
nord_vpn_login.append([nordvpn[i][0], nordvpn[i][1]])

use_nordvpn = True
cprint(
f"You have NordVPN account using these proxies {username} {password}",
f"You have NordVPN account using these proxies {nord_vpn_login}",
"green",
file=sys.stderr,
)
# https://stackoverflow.com/questions/64516109/how-to-use-nordvpn-servers-as-proxy-for-python-requests
# TODO: curl -s https://nordvpn.com/api/server | jq -r ".[] | select(.features.socks==true) | [.domain, .name] | @tsv"
with open("proxies/nordvpn-proxy-list.txt", "r") as file:
proxies = []
for line in file.readlines():
cprint(f"Proxy: {line}", "green", file=sys.stderr)
line = line.replace("\n", "")
# socks5h enable hostname resolution
p = "socks5h://" + username + ":" + password + "@" + line
p = (
"socks5h://"
+ "username"
+ ":"
+ "password"
+ "@"
+ line
+ ":1080"
)
proxies.append(p)
cprint(f"Proxy: {p}", "green", file=sys.stderr)
else:
Expand Down
2 changes: 1 addition & 1 deletion bounty_drive/utils/app_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,5 +114,5 @@
PROXY_FACTOR = 1


RATE_PER_MINUTE = 5
RATE_PER_MINUTE = 1
CURRENT_DELAY = 60 / RATE_PER_MINUTE # TODO add backoff timer
1 change: 1 addition & 0 deletions bounty_drive/utils/nord_vpn_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
nord_vpn_login = []
13 changes: 10 additions & 3 deletions bounty_drive/utils/request_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,13 @@ def start_request(
)
time.sleep(WAF_DELAY)
else:
if scrap_urls:
if "did not match any documents" in response.text:
cprint(
f"No results found for {full_query} with proxy {proxies}",
"yellow",
file=sys.stderr,
)
elif scrap_urls:
urls = []
soup = BeautifulSoup(response.text, "html.parser")
result_block = soup.find_all("div", attrs={"class": "g"})
Expand All @@ -189,7 +195,7 @@ def start_request(
)
if len(result_block) == 0:
cprint(
f"No results found for {full_query} with proxy {proxies}\nTrying new parsing method",
f"No results found for {full_query} with proxy {proxies}\n{response.text}\nTrying new parsing method",
"yellow",
file=sys.stderr,
)
Expand All @@ -203,6 +209,7 @@ def start_request(
url_match = re.search(r"(https?://[^&]+)", href)
if url_match:
url = url_match.group(0)
print(url)
# Extract the title (text within <div> with specific class)
title_tag = a_tag.find("h3") or a_tag.find(
"div", class_="BNeawe vvjwJb AP7Wnd UwRFLe"
Expand Down Expand Up @@ -248,7 +255,7 @@ def start_request(
)

# Placeholder for URL extraction logic
delay = random.uniform(CURRENT_DELAY - 2, CURRENT_DELAY + 2)
delay = random.uniform(CURRENT_DELAY - 5, CURRENT_DELAY + 5)
time.sleep(delay) # Wait before retrying
return urls # Return the category and a placeholder result
except requests.exceptions.ProxyError as e:
Expand Down
Loading

0 comments on commit 955f3a7

Please sign in to comment.