Skip to content

Commit

Permalink
Merge pull request #889 from blacklanternsecurity/finding-uri-fix-2
Browse files Browse the repository at this point in the history
Better handling of non-HTTP URIs
  • Loading branch information
TheTechromancer authored Dec 21, 2023
2 parents 61c00a3 + 73d309f commit 2ffa75c
Show file tree
Hide file tree
Showing 5 changed files with 75 additions and 22 deletions.
10 changes: 10 additions & 0 deletions bbot/core/helpers/async_helpers.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import uuid
import random
import asyncio
import logging
import threading
Expand All @@ -12,6 +13,15 @@
from .cache import CacheDict


class ShuffleQueue(asyncio.Queue):
def _put(self, item):
random_index = random.randint(0, self.qsize())
self._queue.insert(random_index, item)

def _get(self):
return self._queue.popleft()


class _Lock(asyncio.Lock):
def __init__(self, name):
self.name = name
Expand Down
6 changes: 3 additions & 3 deletions bbot/modules/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from ..core.helpers.misc import get_size # noqa
from ..core.errors import ValidationError
from ..core.helpers.async_helpers import TaskCounter
from ..core.helpers.async_helpers import TaskCounter, ShuffleQueue


class BaseModule:
Expand Down Expand Up @@ -1067,13 +1067,13 @@ def config(self):
@property
def incoming_event_queue(self):
if self._incoming_event_queue is None:
self._incoming_event_queue = asyncio.PriorityQueue()
self._incoming_event_queue = ShuffleQueue()
return self._incoming_event_queue

@property
def outgoing_event_queue(self):
if self._outgoing_event_queue is None:
self._outgoing_event_queue = asyncio.PriorityQueue()
self._outgoing_event_queue = ShuffleQueue()
return self._outgoing_event_queue

@property
Expand Down
14 changes: 8 additions & 6 deletions bbot/modules/internal/excavate.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,8 @@ def report(self, result, name, event, **kwargs):
host, port = self.excavate.helpers.split_host_port(parsed_uri.netloc)
# Handle non-HTTP URIs (ftp, s3, etc.)
if not "http" in parsed_uri.scheme.lower():
# these findings are pretty mundane so don't bother with them if they aren't in scope
abort_if = lambda e: e.scope_distance > 0
event_data = {"host": str(host), "description": f"Non-HTTP URI: {result}"}
parsed_url = getattr(event, "parsed", None)
if parsed_url:
Expand All @@ -157,11 +159,16 @@ def report(self, result, name, event, **kwargs):
event_data,
"FINDING",
source=event,
abort_if=abort_if,
)
protocol_data = {"protocol": parsed_uri.scheme, "host": str(host)}
if port:
protocol_data["port"] = port
self.excavate.emit_event(
{"protocol": parsed_uri.scheme, "host": str(host)},
protocol_data,
"PROTOCOL",
source=event,
abort_if=abort_if,
)
return

Expand Down Expand Up @@ -340,7 +347,6 @@ async def handle_event(self, event):
web_spider_distance = getattr(event, "web_spider_distance", 0)
num_redirects = max(getattr(event, "num_redirects", 0), web_spider_distance)
location = event.data.get("location", "")
host = event.host
# if it's a redirect
if location:
# get the url scheme
Expand All @@ -361,10 +367,6 @@ async def handle_event(self, event):
self.emit_event(url_event)
else:
self.verbose(f"Exceeded max HTTP redirects ({self.max_redirects}): {location}")
elif scheme:
# we ran into a scheme that's not HTTP or HTTPS
data = {"host": host, "description": f"Non-standard URI scheme: {scheme}://", "url": location}
self.emit_event(data, "FINDING", event)

body = self.helpers.recursive_decode(event.data.get("body", ""))
# Cloud extractors
Expand Down
13 changes: 3 additions & 10 deletions bbot/scanner/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from contextlib import suppress

from ..core.errors import ValidationError
from ..core.helpers.async_helpers import TaskCounter
from ..core.helpers.async_helpers import TaskCounter, ShuffleQueue

log = logging.getLogger("bbot.scanner.manager")

Expand All @@ -18,7 +18,7 @@ class ScanManager:
Attributes:
scan (Scan): Reference to the Scan object that instantiated the ScanManager.
incoming_event_queue (asyncio.PriorityQueue): Queue storing incoming events for processing.
incoming_event_queue (ShuffleQueue): Queue storing incoming events for processing.
events_distributed (set): Set tracking globally unique events.
events_accepted (set): Set tracking events accepted by individual modules.
dns_resolution (bool): Flag to enable or disable DNS resolution.
Expand All @@ -39,14 +39,7 @@ def __init__(self, scan):

self.scan = scan

# TODO: consider reworking modules' dedupe policy (accept_dupes)
# by creating a function that decides the criteria for what is
# considered to be a duplicate (by default this would be a simple
# hash(event)), but allowing each module to override it if needed.
# If a module used the default function, its dedupe could be done
# at the manager level to save memory. If not, it would be done by the scan.

self.incoming_event_queue = asyncio.PriorityQueue()
self.incoming_event_queue = ShuffleQueue()
# track incoming duplicates module-by-module (for `suppress_dupes` attribute of modules)
self.incoming_dup_tracker = set()
# track outgoing duplicates (for `accept_dupes` attribute of modules)
Expand Down
54 changes: 51 additions & 3 deletions bbot/test/test_step_2/module_tests/test_module_excavate.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ def check(self, module_test, events):


class TestExcavateRedirect(TestExcavate):
targets = ["http://127.0.0.1:8888/", "http://127.0.0.1:8888/relative/"]
targets = ["http://127.0.0.1:8888/", "http://127.0.0.1:8888/relative/", "http://127.0.0.1:8888/nonhttpredirect/"]
config_overrides = {"scope_report_distance": 1}

async def setup_before_prep(self, module_test):
Expand All @@ -161,11 +161,59 @@ async def setup_before_prep(self, module_test):
module_test.httpserver.expect_request("/relative/").respond_with_data(
"", status=302, headers={"Location": "./owa/"}
)
module_test.httpserver.expect_request("/relative/owa/").respond_with_data(
"ftp://127.0.0.1:2121\nsmb://127.0.0.1\nssh://127.0.0.2"
)
module_test.httpserver.expect_request("/nonhttpredirect/").respond_with_data(
"", status=302, headers={"Location": "awb://127.0.0.1:7777"}
)
module_test.httpserver.no_handler_status_code = 404

def check(self, module_test, events):
assert any(e.data == "https://www.test.notreal/yep" for e in events)
assert any(e.data == "http://127.0.0.1:8888/relative/owa/" for e in events)
assert 1 == len(
[
e
for e in events
if e.type == "URL_UNVERIFIED" and e.data == "https://www.test.notreal/yep" and e.scope_distance == 1
]
)
assert 1 == len([e for e in events if e.type == "URL" and e.data == "http://127.0.0.1:8888/relative/owa/"])
assert 1 == len(
[
e
for e in events
if e.type == "FINDING" and e.data["description"] == "Non-HTTP URI: awb://127.0.0.1:7777"
]
)
assert 1 == len(
[
e
for e in events
if e.type == "PROTOCOL" and e.data["protocol"] == "AWB" and e.data.get("port", 0) == 7777
]
)
assert 1 == len(
[
e
for e in events
if e.type == "FINDING" and e.data["description"] == "Non-HTTP URI: ftp://127.0.0.1:2121"
]
)
assert 1 == len(
[
e
for e in events
if e.type == "PROTOCOL" and e.data["protocol"] == "FTP" and e.data.get("port", 0) == 2121
]
)
assert 1 == len(
[e for e in events if e.type == "FINDING" and e.data["description"] == "Non-HTTP URI: smb://127.0.0.1"]
)
assert 1 == len(
[e for e in events if e.type == "PROTOCOL" and e.data["protocol"] == "SMB" and not "port" in e.data]
)
assert 0 == len([e for e in events if e.type == "FINDING" and "ssh://127.0.0.1" in e.data["description"]])
assert 0 == len([e for e in events if e.type == "PROTOCOL" and e.data["protocol"] == "SSH"])


class TestExcavateMaxLinksPerPage(TestExcavate):
Expand Down

0 comments on commit 2ffa75c

Please sign in to comment.