Skip to content

Commit

Permalink
improve ssl disablement, filedownload improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
TheTechromancer committed Sep 21, 2023
1 parent 4bc355e commit 8e9ede2
Show file tree
Hide file tree
Showing 3 changed files with 92 additions and 52 deletions.
111 changes: 71 additions & 40 deletions bbot/core/helpers/web.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import traceback
from pathlib import Path
from bs4 import BeautifulSoup
from contextlib import asynccontextmanager

from httpx._models import Cookies

Expand Down Expand Up @@ -137,7 +138,11 @@ def __init__(self, parent_helper):
def AsyncClient(self, *args, **kwargs):
kwargs["_bbot_scan"] = self.parent_helper.scan
retries = kwargs.pop("retries", self.parent_helper.config.get("http_retries", 1))
kwargs["transport"] = httpx.AsyncHTTPTransport(retries=retries, verify=self.ssl_verify)
transport = httpx.AsyncHTTPTransport(retries=retries, verify=self.ssl_verify)
if not self.ssl_verify:
# if we don't want to verify cert validity, we REALLY don't want to verify.
transport._pool._ssl_context = self.ssl_context_noverify()
kwargs["transport"] = transport
kwargs["verify"] = self.ssl_verify
return BBOTAsyncClient(*args, **kwargs)

Expand Down Expand Up @@ -216,7 +221,7 @@ async def request(self, *args, **kwargs):
if client_kwargs:
client = self.AsyncClient(**client_kwargs)

try:
async with self._acatch(url, raise_error):
if self.http_debug:
logstr = f"Web request: {str(args)}, {str(kwargs)}"
log.debug(logstr)
Expand All @@ -226,41 +231,6 @@ async def request(self, *args, **kwargs):
f"Web response from {url}: {response} (Length: {len(response.content)}) headers: {response.headers}"
)
return response
except httpx.PoolTimeout:
# this block exists because of this:
# https://github.com/encode/httpcore/discussions/783
log.verbose(f"PoolTimeout to URL: {url}")
self.web_client = self.AsyncClient(persist_cookies=False)
return await self.request(*args, **kwargs)
except httpx.TimeoutException:
log.verbose(f"HTTP timeout to URL: {url}")
if raise_error:
raise
except httpx.ConnectError:
log.verbose(f"HTTP connect failed to URL: {url}")
if raise_error:
raise
except httpx.RequestError as e:
log.trace(f"Error with request to URL: {url}: {e}")
log.trace(traceback.format_exc())
if raise_error:
raise
except ssl.SSLError as e:
msg = f"SSL error with request to URL: {url}: {e}"
log.trace(msg)
log.trace(traceback.format_exc())
if raise_error:
raise httpx.RequestError(msg)
except anyio.EndOfStream as e:
msg = f"AnyIO error with request to URL: {url}: {e}"
log.trace(msg)
log.trace(traceback.format_exc())
if raise_error:
raise httpx.RequestError(msg)
except BaseException as e:
log.trace(f"Unhandled exception with request to URL: {url}: {e}")
log.trace(traceback.format_exc())
raise

async def download(self, url, **kwargs):
"""
Expand All @@ -276,6 +246,7 @@ async def download(self, url, **kwargs):
cache_hrs (float, optional): The number of hours to cache the downloaded file.
A negative value disables caching. Defaults to -1.
method (str, optional): The HTTP method to use for the request, defaults to 'GET'.
raise_error (bool, optional): Whether to raise exceptions for HTTP connect, timeout errors. Defaults to False.
**kwargs: Additional keyword arguments to pass to the httpx request.
Returns:
Expand All @@ -286,23 +257,27 @@ async def download(self, url, **kwargs):
"""
success = False
filename = kwargs.pop("filename", self.parent_helper.cache_filename(url))
follow_redirects = kwargs.pop("follow_redirects", True)
max_size = kwargs.pop("max_size", None)
warn = kwargs.pop("warn", True)
raise_error = kwargs.pop("raise_error", False)
if max_size is not None:
max_size = self.parent_helper.human_to_bytes(max_size)
cache_hrs = float(kwargs.pop("cache_hrs", -1))
total_size = 0
chunk_size = 8192
log.debug(f"Downloading file from {url} with cache_hrs={cache_hrs}")
log.hugesuccess(f"Downloading file from {url} with cache_hrs={cache_hrs}")
if cache_hrs > 0 and self.parent_helper.is_cached(url):
log.debug(f"{url} is cached at {self.parent_helper.cache_filename(url)}")
success = True
else:
# kwargs["raise_error"] = True
# kwargs["stream"] = True
kwargs["follow_redirects"] = follow_redirects
if not "method" in kwargs:
kwargs["method"] = "GET"
try:
async with self.AsyncClient().stream(url=url, **kwargs) as response:
async with self._acatch(url, raise_error), self.AsyncClient().stream(url=url, **kwargs) as response:
status_code = getattr(response, "status_code", 0)
log.debug(f"Download result: HTTP {status_code}")
if status_code != 0:
Expand All @@ -320,7 +295,10 @@ async def download(self, url, **kwargs):
f.write(chunk)
success = True
except httpx.HTTPError as e:
log.warning(f"Failed to download {url}: {e}")
log_fn = log.verbose
if warn:
log_fn = log.warning
log_fn(f"Failed to download {url}: {e}")
return

if success:
Expand Down Expand Up @@ -588,6 +566,59 @@ def is_spider_danger(self, source_event, url):
return True
return False

def ssl_context_noverify(self):
ssl_context = ssl.create_default_context()
ssl_context.check_hostname = False
ssl_context.verify_mode = ssl.CERT_NONE
ssl_context.options &= ~ssl.OP_NO_SSLv2 & ~ssl.OP_NO_SSLv3
ssl_context.set_ciphers("ALL:@SECLEVEL=0")
ssl_context.options |= 0x4 # Add the OP_LEGACY_SERVER_CONNECT option
return ssl_context

@asynccontextmanager
async def _acatch(self, url, raise_error):
"""
Asynchronous context manager to handle various httpx errors during a request.
Yields:
None
Note:
This function is internal and should generally not be used directly.
`url`, `args`, `kwargs`, and `raise_error` should be in the same context as this function.
"""
try:
yield
except httpx.TimeoutException:
log.verbose(f"HTTP timeout to URL: {url}")
if raise_error:
raise
except httpx.ConnectError:
log.verbose(f"HTTP connect failed to URL: {url}")
if raise_error:
raise
except httpx.RequestError as e:
log.trace(f"Error with request to URL: {url}: {e}")
log.trace(traceback.format_exc())
if raise_error:
raise
except ssl.SSLError as e:
msg = f"SSL error with request to URL: {url}: {e}"
log.trace(msg)
log.trace(traceback.format_exc())
if raise_error:
raise httpx.RequestError(msg)
except anyio.EndOfStream as e:
msg = f"AnyIO error with request to URL: {url}: {e}"
log.trace(msg)
log.trace(traceback.format_exc())
if raise_error:
raise httpx.RequestError(msg)
except BaseException as e:
log.trace(f"Unhandled exception with request to URL: {url}: {e}")
log.trace(traceback.format_exc())
raise


user_keywords = [re.compile(r, re.I) for r in ["user", "login", "email"]]
pass_keywords = [re.compile(r, re.I) for r in ["pass"]]
Expand Down
25 changes: 20 additions & 5 deletions bbot/modules/filedownload.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,20 +87,35 @@ async def setup(self):
self.download_dir = self.scan.home / "filedownload"
self.helpers.mkdir(self.download_dir)
self.files_downloaded = 0
self.seen = set()
# https://raw.githubusercontent.com/jshttp/mime-db/master/db.json
return True

async def filter_event(self, event):
h = hash(event.data)
if h in self.seen:
return False, f"Already processed {event}"
self.seen.add(h)
return True

async def handle_event(self, event):
url_lower = event.data.lower()
if any(url_lower.endswith(f".{e}") for e in self.extensions):
timestamp = self.helpers.make_date(event.timestamp)
filepath = Path(event.parsed.path)
filename_stem = self.helpers.tagify(filepath.stem)
filename = f"{timestamp}_{filename_stem}{filepath.suffix}"
split_url = url_lower.rsplit(".", 1)
url_stem = split_url[0]
filename = f"{timestamp}_{self.helpers.tagify(url_stem)}"
if len(split_url) == 2:
filename = f"{filename}.{split_url[-1]}"
file_destination = self.download_dir / filename
base_url = f"{event.parsed.scheme}://{event.parsed.netloc}"
self.info(f'Found "{filepath.name}" at "{base_url}", downloading to {file_destination}')
await self.helpers.download(event.data, filename=file_destination, max_size=self.max_filesize)
self.files_downloaded += 1
result = await self.helpers.download(
event.data, warn=False, filename=file_destination, max_size=self.max_filesize
)
if result:
self.info(f'Found "{filepath.name}" at "{base_url}", downloaded to {file_destination}')
self.files_downloaded += 1

async def report(self):
if self.files_downloaded > 0:
Expand Down
8 changes: 1 addition & 7 deletions bbot/modules/sslcert.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import ssl
import asyncio
from OpenSSL import crypto
from contextlib import suppress
Expand Down Expand Up @@ -109,12 +108,7 @@ async def visit_host(self, host, port):

# Create an SSL context
try:
ssl_context = ssl.create_default_context()
ssl_context.check_hostname = False
ssl_context.verify_mode = ssl.CERT_NONE
ssl_context.options &= ~ssl.OP_NO_SSLv2 & ~ssl.OP_NO_SSLv3
ssl_context.set_ciphers("ALL:@SECLEVEL=0")
ssl_context.options |= 0x4 # Add the OP_LEGACY_SERVER_CONNECT option
ssl_context = self.helpers.ssl_context_noverify()
except Exception as e:
self.warning(f"Error creating SSL context: {e}")
return [], [], (host, port)
Expand Down

0 comments on commit 8e9ede2

Please sign in to comment.