Skip to content

Commit

Permalink
steady work on scope shepherding
Browse files Browse the repository at this point in the history
  • Loading branch information
TheTechromancer committed Oct 7, 2023
1 parent 90d4d1d commit b8cfdd2
Show file tree
Hide file tree
Showing 14 changed files with 909 additions and 188 deletions.
90 changes: 15 additions & 75 deletions bbot/core/event/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,9 +143,6 @@ def __init__(
self._module_priority = None
self._resolved_hosts = set()

self._made_internal = False
# whether to force-output this event to the graph
self._graph_important = False
# keep track of whether this event has been recorded by the scan
self._stats_recorded = False

Expand Down Expand Up @@ -292,7 +289,9 @@ def remove_tag(self, tag):

@property
def always_emit(self):
return self._always_emit or any(t in self.tags for t in self._always_emit_tags)
always_emit_tags = any(t in self.tags for t in self._always_emit_tags)
no_host_information = not bool(self.host)
return self._always_emit or always_emit_tags or no_host_information

@property
def id(self):
Expand Down Expand Up @@ -320,6 +319,10 @@ def scope_distance(self, scope_distance):
The method will automatically update the relevant 'distance-' tags associated with the event.
"""
if scope_distance >= 0:
if scope_distance == 0:
self.add_tag("in-scope")
else:
self.remove_tag("in-scope")
new_scope_distance = None
# ensure scope distance does not increase (only allow setting to smaller values)
if self.scope_distance == -1:
Expand All @@ -332,6 +335,10 @@ def scope_distance(self, scope_distance):
if t.startswith("distance-"):
self.remove_tag(t)
self.add_tag(f"distance-{new_scope_distance}")
# apply recursively to parent events
source_scope_distance = getattr(self.source, "scope_distance", -1)
if source_scope_distance >= 0 and self != self.source:
self.source.scope_distance = scope_distance + 1

@property
def source(self):
Expand Down Expand Up @@ -411,75 +418,8 @@ def make_internal(self):
The purpose of internal events is to enable speculative/explorative discovery without cluttering
the console with irrelevant or uninteresting events.
"""
if not self._made_internal:
self._internal = True
self.add_tag("internal")
self._made_internal = True

def unmake_internal(self, set_scope_distance=None, graph_important=False):
"""
Reverts the event from being internal, optionally forcing it to be included in output and setting its scope distance.
Removes the 'internal' tag, resets the `_internal` attribute, and adjusts scope distance if specified.
Optionally, forces the event to be included in the output. Also, if any source events are internal, they
are also reverted recursively.
This typically happens in `ScanManager._emit_event()` if the event is determined to be interesting.
Parameters:
set_scope_distance (int, optional): If specified, sets the scope distance to this value.
graph_important (bool or str, optional): If True, forces the event to be included in output.
If set to "trail_only", only its source events are modified.
Returns:
list: A list of source events that were also reverted from being internal.
"""
source_trail = []
self.remove_tag("internal")
if self._made_internal:
if set_scope_distance is not None:
self.scope_distance = set_scope_distance
self._internal = False
self._made_internal = False
if graph_important is True:
self._graph_important = True
if graph_important == "trail_only":
graph_important = True

# if our source event is internal, unmake it too
if getattr(self.source, "_internal", False):
source_scope_distance = None
if set_scope_distance is not None:
source_scope_distance = set_scope_distance + 1
source_trail += self.source.unmake_internal(
set_scope_distance=source_scope_distance, graph_important=graph_important
)
source_trail.append(self.source)

return source_trail

def set_scope_distance(self, d=0):
"""
Sets the scope distance for the event and its parent events, while considering module-specific scoping rules.
Unmakes the event internal if needed and adjusts its scope distance. If the distance is set to 0,
adds the 'in-scope' tag to the event. Takes into account module-specific scoping preferences unless
the event type is "DNS_NAME".
Parameters:
d (int): The scope distance to set for this event.
Returns:
list: A list of parent events whose scope distance was also set.
"""
source_trail = []
# keep the event internal if the module requests so, unless it's a DNS_NAME
if getattr(self.module, "_scope_shepherding", True) or self.type in ("DNS_NAME",):
source_trail = self.unmake_internal(set_scope_distance=d, graph_important="trail_only")
self.scope_distance = d
if d == 0:
self.add_tag("in-scope")
return source_trail
self._internal = True
self.add_tag("internal")

def _host(self):
return ""
Expand Down Expand Up @@ -747,7 +687,7 @@ def __hash__(self):
def __str__(self):
max_event_len = 80
d = str(self.data)
return f'{self.type}("{d[:max_event_len]}{("..." if len(d) > max_event_len else "")}", module={self.module}, tags={self.tags})'
return f'{self.type}("{d[:max_event_len]}{("..." if len(d) > max_event_len else "")}", module={self.module}, tags={self.tags} graph_important={self._graph_important})'

def __repr__(self):
return str(self)
Expand Down Expand Up @@ -1225,7 +1165,7 @@ def make_event(
data.module = module
if source is not None:
data.source = source
if internal == True and not data._made_internal:
if internal == True:
data.make_internal()
event_type = data.type
return data
Expand Down
52 changes: 43 additions & 9 deletions bbot/core/helpers/dns.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import dns
import time
import asyncio
import logging
Expand Down Expand Up @@ -89,7 +90,7 @@ def __init__(self, parent_helper):
self.timeout = self.parent_helper.config.get("dns_timeout", 5)
self.retries = self.parent_helper.config.get("dns_retries", 1)
self.abort_threshold = self.parent_helper.config.get("dns_abort_threshold", 50)
self.max_dns_resolve_distance = self.parent_helper.config.get("max_dns_resolve_distance", 4)
self.max_dns_resolve_distance = self.parent_helper.config.get("max_dns_resolve_distance", 5)
self.resolver.timeout = self.timeout
self.resolver.lifetime = self.timeout
self._resolver_list = None
Expand Down Expand Up @@ -132,6 +133,10 @@ def __init__(self, parent_helper):
self._event_cache = self.parent_helper.CacheDict(max_size=10000)
self._event_cache_locks = NamedLock()

# for mocking DNS queries
self._orig_resolve_raw = None
self._mock_table = {}

# copy the system's current resolvers to a text file for tool use
self.system_resolvers = dns.resolver.Resolver().nameservers
self.resolver_file = self.parent_helper.tempfile(self.system_resolvers, pipe=False)
Expand Down Expand Up @@ -220,13 +225,7 @@ async def resolve_raw(self, query, **kwargs):
kwargs.pop("rdtype", None)
if "type" in kwargs:
t = kwargs.pop("type")
if isinstance(t, str):
if t.strip().lower() in ("any", "all", "*"):
types = self.all_rdtypes
else:
types = [t.strip().upper()]
elif any([isinstance(t, x) for x in (list, tuple)]):
types = [str(_).strip().upper() for _ in t]
types = self._parse_rdtype(t, default=types)
for t in types:
r, e = await self._resolve_hostname(query, rdtype=t, **kwargs)
if r:
Expand Down Expand Up @@ -500,7 +499,7 @@ async def resolve_event(self, event, minimal=False):
event_blacklisted = False

try:
if not event.host or event.type in ("IP_RANGE",):
if (not event.host) or (event.type in ("IP_RANGE",)):
return event_tags, event_whitelisted, event_blacklisted, dns_children

# lock to ensure resolution of the same host doesn't start while we're working here
Expand Down Expand Up @@ -1016,6 +1015,16 @@ async def _connectivity_check(self, interval=5):
self._errors.clear()
return False

def _parse_rdtype(self, t, default=None):
if isinstance(t, str):
if t.strip().lower() in ("any", "all", "*"):
return self.all_rdtypes
else:
return [t.strip().upper()]
elif any([isinstance(t, x) for x in (list, tuple)]):
return [str(_).strip().upper() for _ in t]
return default

def debug(self, *args, **kwargs):
if self._debug:
log.debug(*args, **kwargs)
Expand All @@ -1027,3 +1036,28 @@ def _get_dummy_module(self, name):
dummy_module = self.parent_helper._make_dummy_module(name=name, _type="DNS")
self._dummy_modules[name] = dummy_module
return dummy_module

def mock_dns(self, dns_dict):
if self._orig_resolve_raw is None:
self._orig_resolve_raw = self.resolve_raw

async def mock_resolve_raw(query, **kwargs):
results = []
errors = []
types = self._parse_rdtype(kwargs.get("type", ["A", "AAAA"]))
for t in types:
with suppress(KeyError):
results += self._mock_table[(query, t)]
return results, errors

for (query, rdtype), answers in dns_dict.items():
if isinstance(answers, str):
answers = [answers]
for answer in answers:
rdata = dns.rdata.from_text("IN", rdtype, answer)
try:
self._mock_table[(query, rdtype)].append((rdtype, rdata))
except KeyError:
self._mock_table[(query, rdtype)] = [(rdtype, [rdata])]

self.resolve_raw = mock_resolve_raw
68 changes: 58 additions & 10 deletions bbot/modules/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,10 @@ def __init__(self, scan):
self._log = None
self._incoming_event_queue = None
self._outgoing_event_queue = None
# track incoming events to prevent unwanted duplicates
self._incoming_dup_tracker = set()
# track events that are critical to the graph
self._graph_important_tracker = set()
# seconds since we've submitted a batch
self._last_submitted_batch = None
# additional callbacks to be executed alongside self.cleanup()
Expand Down Expand Up @@ -671,6 +675,30 @@ def _event_precheck(self, event):
return True, "precheck succeeded"

async def _event_postcheck(self, event):
"""
A simple wrapper for dup tracking and preserving event chains for graph modules
"""
acceptable, reason = await self.__event_postcheck(event)
if acceptable:
is_incoming_duplicate = self.is_incoming_duplicate(event, add=True)
if is_incoming_duplicate and not self.accept_dupes:
if not self._graph_important(event):
return False, f"module has already seen {event}"

if self._preserve_graph:
s = event
while 1:
s = s.source
if s is None or s == self.scan.root_event or s == event:
break
if not self.is_incoming_duplicate(s, add=True):
self._graph_important_tracker.add(hash(event))
self.critical(f"queueing {event}")
await self.queue_event(s, precheck=False)

return acceptable, reason

async def __event_postcheck(self, event):
"""
Post-checks an event to determine if it should be accepted by the module for handling.
Expand All @@ -683,14 +711,6 @@ async def _event_postcheck(self, event):
Returns:
tuple: A tuple (bool, str) where the bool indicates if the event should be accepted, and the str gives the reason.
Examples:
>>> async def custom_filter(event):
... if event.data not in ["evilcorp.com"]:
... return False, "it's not on the cool list"
...
>>> self.filter_event = custom_filter
>>> result, reason = await self._event_postcheck(event)
Notes:
- Override the `filter_event` method for custom filtering logic.
- This method also maintains host-based tracking when the `per_host_only` flag is set.
Expand All @@ -709,6 +729,8 @@ async def _event_postcheck(self, event):
# check scope distance
filter_result, reason = self._scope_distance_check(event)
if not filter_result:
if self._is_graph_important(event):
return True, f"{reason}, but exception was made because it is graph important"
return filter_result, reason

# custom filtering
Expand Down Expand Up @@ -765,7 +787,7 @@ async def _cleanup(self):
async with self.scan._acatch(context), self._task_counter.count(context):
await self.helpers.execute_sync_or_async(callback)

async def queue_event(self, event):
async def queue_event(self, event, precheck=True):
"""
Asynchronously queues an incoming event to the module's event queue for further processing.
Expand All @@ -788,7 +810,9 @@ async def queue_event(self, event):
if self.incoming_event_queue is False:
self.debug(f"Not in an acceptable state to queue incoming event")
return
acceptable, reason = self._event_precheck(event)
acceptable, reason = True, "no precheck was performed"
if precheck:
acceptable, reason = self._event_precheck(event)
if not acceptable:
if reason and reason != "its type is not in watched_events":
self.debug(f"Not accepting {event} because {reason}")
Expand Down Expand Up @@ -871,6 +895,30 @@ def set_error_state(self, message=None, clear_outgoing_queue=False):
while 1:
self.outgoing_event_queue.get_nowait()

def is_incoming_duplicate(self, event, add=False):
event_hash = self._incoming_dedup_hash(event)
is_dup = event_hash in self._incoming_dup_tracker
if add:
self._incoming_dup_tracker.add(event_hash)
if self._is_graph_important(event):
return False
return is_dup

def _is_graph_important(self, event):
return self._preserve_graph and hash(event) in self._graph_important_tracker

def _incoming_dedup_hash(self, event):
"""
Determines the criteria for what is considered to be a duplicate event if `accept_dupes` is False.
"""
return hash(event)

def _outgoing_dedup_hash(self, event):
"""
Determines the criteria for what is considered to be a duplicate event if `suppress_dupes` is True.
"""
return hash(event)

def get_per_host_hash(self, event):
"""
Computes a per-host hash value for a given event. This method may be optionally overridden in subclasses.
Expand Down
1 change: 1 addition & 0 deletions bbot/modules/httpx.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ async def handle_batch(self, *events):
if proxy:
command += ["-http-proxy", proxy]
async for line in self.helpers.run_live(command, input=list(stdin), stderr=subprocess.DEVNULL):
self.critical(line)
try:
j = json.loads(line)
except json.decoder.JSONDecodeError:
Expand Down
3 changes: 1 addition & 2 deletions bbot/modules/internal/speculate.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import random
import ipaddress

from bbot.core.helpers.misc import parse_port_string
from bbot.modules.internal.base import BaseInternalModule


Expand Down Expand Up @@ -43,7 +42,7 @@ async def setup(self):
port_string = self.config.get("ports", "80,443")

try:
self.ports = parse_port_string(port_string)
self.ports = self.helpers.parse_port_string(port_string)
except ValueError as e:
self.warning(f"Error parsing ports: {e}")
return False
Expand Down
Loading

0 comments on commit b8cfdd2

Please sign in to comment.