Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Better handling of non-HTTP URIs #889

Merged
merged 5 commits into from
Dec 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions bbot/core/helpers/async_helpers.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import uuid
import random
import asyncio
import logging
import threading
Expand All @@ -12,6 +13,15 @@
from .cache import CacheDict


class ShuffleQueue(asyncio.Queue):
def _put(self, item):
random_index = random.randint(0, self.qsize())
self._queue.insert(random_index, item)

def _get(self):
return self._queue.popleft()


class _Lock(asyncio.Lock):
def __init__(self, name):
self.name = name
Expand Down
6 changes: 3 additions & 3 deletions bbot/modules/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from ..core.helpers.misc import get_size # noqa
from ..core.errors import ValidationError
from ..core.helpers.async_helpers import TaskCounter
from ..core.helpers.async_helpers import TaskCounter, ShuffleQueue


class BaseModule:
Expand Down Expand Up @@ -1065,13 +1065,13 @@ def config(self):
@property
def incoming_event_queue(self):
if self._incoming_event_queue is None:
self._incoming_event_queue = asyncio.PriorityQueue()
self._incoming_event_queue = ShuffleQueue()
return self._incoming_event_queue

@property
def outgoing_event_queue(self):
if self._outgoing_event_queue is None:
self._outgoing_event_queue = asyncio.PriorityQueue()
self._outgoing_event_queue = ShuffleQueue()
return self._outgoing_event_queue

@property
Expand Down
14 changes: 8 additions & 6 deletions bbot/modules/internal/excavate.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,8 @@ def report(self, result, name, event, **kwargs):
host, port = self.excavate.helpers.split_host_port(parsed_uri.netloc)
# Handle non-HTTP URIs (ftp, s3, etc.)
if not "http" in parsed_uri.scheme.lower():
# these findings are pretty mundane so don't bother with them if they aren't in scope
abort_if = lambda e: e.scope_distance > 0
event_data = {"host": str(host), "description": f"Non-HTTP URI: {result}"}
parsed_url = getattr(event, "parsed", None)
if parsed_url:
Expand All @@ -157,11 +159,16 @@ def report(self, result, name, event, **kwargs):
event_data,
"FINDING",
source=event,
abort_if=abort_if,
)
protocol_data = {"protocol": parsed_uri.scheme, "host": str(host)}
if port:
protocol_data["port"] = port
self.excavate.emit_event(
{"protocol": parsed_uri.scheme, "host": str(host)},
protocol_data,
"PROTOCOL",
source=event,
abort_if=abort_if,
)
return

Expand Down Expand Up @@ -340,7 +347,6 @@ async def handle_event(self, event):
web_spider_distance = getattr(event, "web_spider_distance", 0)
num_redirects = max(getattr(event, "num_redirects", 0), web_spider_distance)
location = event.data.get("location", "")
host = event.host
# if it's a redirect
if location:
# get the url scheme
Expand All @@ -361,10 +367,6 @@ async def handle_event(self, event):
self.emit_event(url_event)
else:
self.verbose(f"Exceeded max HTTP redirects ({self.max_redirects}): {location}")
elif scheme:
# we ran into a scheme that's not HTTP or HTTPS
data = {"host": host, "description": f"Non-standard URI scheme: {scheme}://", "url": location}
self.emit_event(data, "FINDING", event)

body = self.helpers.recursive_decode(event.data.get("body", ""))
# Cloud extractors
Expand Down
13 changes: 3 additions & 10 deletions bbot/scanner/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from contextlib import suppress

from ..core.errors import ValidationError
from ..core.helpers.async_helpers import TaskCounter
from ..core.helpers.async_helpers import TaskCounter, ShuffleQueue

log = logging.getLogger("bbot.scanner.manager")

Expand All @@ -18,7 +18,7 @@ class ScanManager:

Attributes:
scan (Scan): Reference to the Scan object that instantiated the ScanManager.
incoming_event_queue (asyncio.PriorityQueue): Queue storing incoming events for processing.
incoming_event_queue (ShuffleQueue): Queue storing incoming events for processing.
events_distributed (set): Set tracking globally unique events.
events_accepted (set): Set tracking events accepted by individual modules.
dns_resolution (bool): Flag to enable or disable DNS resolution.
Expand All @@ -39,14 +39,7 @@ def __init__(self, scan):

self.scan = scan

# TODO: consider reworking modules' dedupe policy (accept_dupes)
# by creating a function that decides the criteria for what is
# considered to be a duplicate (by default this would be a simple
# hash(event)), but allowing each module to override it if needed.
# If a module used the default function, its dedupe could be done
# at the manager level to save memory. If not, it would be done by the scan.

self.incoming_event_queue = asyncio.PriorityQueue()
self.incoming_event_queue = ShuffleQueue()
# track incoming duplicates module-by-module (for `suppress_dupes` attribute of modules)
self.incoming_dup_tracker = set()
# track outgoing duplicates (for `accept_dupes` attribute of modules)
Expand Down
54 changes: 51 additions & 3 deletions bbot/test/test_step_2/module_tests/test_module_excavate.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ def check(self, module_test, events):


class TestExcavateRedirect(TestExcavate):
targets = ["http://127.0.0.1:8888/", "http://127.0.0.1:8888/relative/"]
targets = ["http://127.0.0.1:8888/", "http://127.0.0.1:8888/relative/", "http://127.0.0.1:8888/nonhttpredirect/"]
config_overrides = {"scope_report_distance": 1}

async def setup_before_prep(self, module_test):
Expand All @@ -161,11 +161,59 @@ async def setup_before_prep(self, module_test):
module_test.httpserver.expect_request("/relative/").respond_with_data(
"", status=302, headers={"Location": "./owa/"}
)
module_test.httpserver.expect_request("/relative/owa/").respond_with_data(
"ftp://127.0.0.1:2121\nsmb://127.0.0.1\nssh://127.0.0.2"
)
module_test.httpserver.expect_request("/nonhttpredirect/").respond_with_data(
"", status=302, headers={"Location": "awb://127.0.0.1:7777"}
)
module_test.httpserver.no_handler_status_code = 404

def check(self, module_test, events):
assert any(e.data == "https://www.test.notreal/yep" for e in events)
assert any(e.data == "http://127.0.0.1:8888/relative/owa/" for e in events)
assert 1 == len(
[
e
for e in events
if e.type == "URL_UNVERIFIED" and e.data == "https://www.test.notreal/yep" and e.scope_distance == 1
]
)
assert 1 == len([e for e in events if e.type == "URL" and e.data == "http://127.0.0.1:8888/relative/owa/"])
assert 1 == len(
[
e
for e in events
if e.type == "FINDING" and e.data["description"] == "Non-HTTP URI: awb://127.0.0.1:7777"
]
)
assert 1 == len(
[
e
for e in events
if e.type == "PROTOCOL" and e.data["protocol"] == "AWB" and e.data.get("port", 0) == 7777
]
)
assert 1 == len(
[
e
for e in events
if e.type == "FINDING" and e.data["description"] == "Non-HTTP URI: ftp://127.0.0.1:2121"
]
)
assert 1 == len(
[
e
for e in events
if e.type == "PROTOCOL" and e.data["protocol"] == "FTP" and e.data.get("port", 0) == 2121
]
)
assert 1 == len(
[e for e in events if e.type == "FINDING" and e.data["description"] == "Non-HTTP URI: smb://127.0.0.1"]
)
assert 1 == len(
[e for e in events if e.type == "PROTOCOL" and e.data["protocol"] == "SMB" and not "port" in e.data]
)
assert 0 == len([e for e in events if e.type == "FINDING" and "ssh://127.0.0.1" in e.data["description"]])
assert 0 == len([e for e in events if e.type == "PROTOCOL" and e.data["protocol"] == "SSH"])


class TestExcavateMaxLinksPerPage(TestExcavate):
Expand Down