Skip to content

Commit

Permalink
merge dev
Browse files Browse the repository at this point in the history
  • Loading branch information
TheTechromancer committed Feb 16, 2024
2 parents 30eb11d + 25e5390 commit ac5ac64
Show file tree
Hide file tree
Showing 8 changed files with 107 additions and 14 deletions.
65 changes: 65 additions & 0 deletions bbot/core/helpers/web.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from contextlib import asynccontextmanager

from httpx._models import Cookies
from socksio.exceptions import SOCKSError

from bbot.core.errors import WordlistError, CurlError
from bbot.core.helpers.ratelimiter import RateLimiter
Expand Down Expand Up @@ -566,6 +567,64 @@ def is_spider_danger(self, source_event, url):
return True
return False

def beautifulsoup(
self,
markup,
features="html.parser",
builder=None,
parse_only=None,
from_encoding=None,
exclude_encodings=None,
element_classes=None,
**kwargs,
):
"""
Naviate, Search, Modify, Parse, or PrettyPrint HTML Content.
More information at https://beautiful-soup-4.readthedocs.io/en/latest/
Args:
markup: A string or a file-like object representing markup to be parsed.
features: Desirable features of the parser to be used.
This may be the name of a specific parser ("lxml",
"lxml-xml", "html.parser", or "html5lib") or it may be
the type of markup to be used ("html", "html5", "xml").
Defaults to 'html.parser'.
builder: A TreeBuilder subclass to instantiate (or instance to use)
instead of looking one up based on `features`.
parse_only: A SoupStrainer. Only parts of the document
matching the SoupStrainer will be considered.
from_encoding: A string indicating the encoding of the
document to be parsed.
exclude_encodings = A list of strings indicating
encodings known to be wrong.
element_classes = A dictionary mapping BeautifulSoup
classes like Tag and NavigableString, to other classes you'd
like to be instantiated instead as the parse tree is
built.
**kwargs = For backwards compatibility purposes.
Returns:
soup: An instance of the BeautifulSoup class
Todo:
- Write tests for this function
Examples:
>>> soup = self.helpers.beautifulsoup(event.data["body"], "html.parser")
Perform an html parse of the 'markup' argument and return a soup instance
>>> email_type = soup.find(type="email")
Searches the soup instance for all occurances of the passed in argument
"""
try:
soup = BeautifulSoup(
markup, features, builder, parse_only, from_encoding, exclude_encodings, element_classes, **kwargs
)
return soup
except Exception as e:
log.debug(f"Error parsing beautifulsoup: {e}")
return False

def ssl_context_noverify(self):
if self._ssl_context_noverify is None:
ssl_context = ssl.create_default_context()
Expand Down Expand Up @@ -616,6 +675,12 @@ async def _acatch(self, url, raise_error):
log.trace(traceback.format_exc())
if raise_error:
raise httpx.RequestError(msg)
except SOCKSError as e:
msg = f"SOCKS error with request to URL: {url}: {e}"
log.trace(msg)
log.trace(traceback.format_exc())
if raise_error:
raise httpx.RequestError(msg)
except BaseException as e:
# don't log if the error is the result of an intentional cancellation
if not any(
Expand Down
5 changes: 2 additions & 3 deletions bbot/modules/dnsdumpster.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import re
from bs4 import BeautifulSoup

from bbot.modules.templates.subdomain_enum import subdomain_enum

Expand All @@ -25,7 +24,7 @@ async def query(self, domain):
return ret
else:
self.debug(f'Valid response code "{status_code}" from DNSDumpster')
html = BeautifulSoup(res1.content, "html.parser")
html = self.helpers.beautifulsoup(res1.content, "html.parser")
csrftoken = None
csrfmiddlewaretoken = None
try:
Expand Down Expand Up @@ -73,7 +72,7 @@ async def query(self, domain):
self.verbose(f'Bad response code "{status_code}" from DNSDumpster')
return ret

html = BeautifulSoup(res2.content, "html.parser")
html = self.helpers.beautifulsoup(res2.content, "html.parser")
escaped_domain = re.escape(domain)
match_pattern = re.compile(r"^[\w\.-]+\." + escaped_domain + r"$")
for subdomain in html.findAll(text=match_pattern):
Expand Down
8 changes: 2 additions & 6 deletions bbot/modules/newsletters.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,10 @@
# thanks to BBOT's sub-domain enumeration) looking for the presence of an 'email type' that also
# contains a 'placeholder'. The combination of these two HTML items usually signify the presence
# of an "Enter Your Email Here" type Newsletter Subscription service. This module could be used
# to find newsletters for a future email bombing attack and/or find user-input fields that could
# be be susceptible to overflows or injections.
# to find newsletters for a future email bombing attack.

from .base import BaseModule
import re
from bs4 import BeautifulSoup

# Known Websites with Newsletters
# https://futureparty.com/
Expand All @@ -16,8 +14,6 @@
# https://www.milkkarten.net/
# https://geekout.mattnavarra.com/

deps_pip = ["beautifulsoup4"]


class newsletters(BaseModule):
watched_events = ["HTTP_RESPONSE"]
Expand All @@ -37,7 +33,7 @@ def find_type(self, soup):

async def handle_event(self, event):
if event.data["status_code"] == 200:
soup = BeautifulSoup(event.data["body"], "html.parser")
soup = self.helpers.beautifulsoup(event.data["body"], "html.parser")
result = self.find_type(soup)
if result:
description = f"Found a Newsletter Submission Form that could be used for email bombing attacks"
Expand Down
3 changes: 1 addition & 2 deletions bbot/modules/viewdns.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,8 @@ async def query(self, query):
self.verbose(f"Error retrieving reverse whois results (status code: {status_code})")

content = getattr(r, "content", b"")
from bs4 import BeautifulSoup

html = BeautifulSoup(content, "html.parser")
html = self.helpers.beautifulsoup(content, "html.parser")
found = set()
for table_row in html.findAll("tr"):
table_cells = table_row.findAll("td")
Expand Down
24 changes: 24 additions & 0 deletions bbot/test/test_step_1/test_web.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,30 @@ async def test_web_helpers(bbot_scanner, bbot_config, bbot_httpserver):
assert filename2.is_file()
with open(filename2) as f:
assert f.read() == download_content

# beautifulsoup
download_content = """
<div>
<h1>Example Domain</h1>
<p>This domain is for use in illustrative examples in documents. You may use this
domain in literature without prior coordination or asking for permission.</p>
<p><a href="https://www.iana.org/domains/example">More information...</a></p>
</div>
"""

path = "/test_http_helpers_beautifulsoup"
url = bbot_httpserver.url_for(path)
bbot_httpserver.expect_request(uri=path).respond_with_data(download_content, status=200)
webpage = await scan1.helpers.request(url)
assert webpage, f"Webpage is False"
soup = scan1.helpers.beautifulsoup(webpage, "html.parser")
assert soup, f"Soup is False"
# pretty_print = soup.prettify()
# assert pretty_print, f"PrettyPrint is False"
# scan1.helpers.log.info(f"{pretty_print}")
html_text = soup.find(text="Example Domain")
assert html_text, f"Find HTML Text is False"

# 404
path = "/test_http_helpers_download_404"
url = bbot_httpserver.url_for(path)
Expand Down
1 change: 0 additions & 1 deletion docs/contribution.md
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,6 @@ BBOT automates module dependencies with **Ansible**. If your module relies on a
```python
class MyModule(BaseModule):
...
deps_pip = ["beautifulsoup4"]
deps_apt = ["chromium-browser"]
deps_ansible = [
{
Expand Down
14 changes: 12 additions & 2 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ httpx = "^0.26.0"
cloudcheck = "^2.1.0.181"
tldextract = "^5.1.1"
cachetools = "^5.3.2"
socksio = "^1.0.0"

[tool.poetry.group.dev.dependencies]
flake8 = "^6.0.0"
Expand Down

0 comments on commit ac5ac64

Please sign in to comment.