Skip to content

Commit

Permalink
Merge pull request #1295 from blacklanternsecurity/radixtarget
Browse files Browse the repository at this point in the history
Implement Radix Tree
  • Loading branch information
TheTechromancer authored Apr 26, 2024
2 parents 7baf219 + faf61ee commit 6feb345
Show file tree
Hide file tree
Showing 15 changed files with 212 additions and 156 deletions.
8 changes: 5 additions & 3 deletions bbot/core/event/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,14 @@
from datetime import datetime
from contextlib import suppress
from urllib.parse import urljoin
from radixtarget import RadixTarget
from pydantic import BaseModel, field_validator

from .helpers import *
from bbot.errors import *
from bbot.core.helpers import (
extract_words,
get_file_extension,
host_in_host,
is_domain,
is_subdomain,
is_ip,
Expand Down Expand Up @@ -93,7 +93,7 @@ class BaseEvent:
# Always emit this event type even if it's not in scope
_always_emit = False
# Always emit events with these tags even if they're not in scope
_always_emit_tags = ["affiliate"]
_always_emit_tags = ["affiliate", "target"]
# Bypass scope checking and dns resolution, distribute immediately to modules
# This is useful for "end-of-line" events like FINDING and VULNERABILITY
_quick_emit = False
Expand Down Expand Up @@ -580,7 +580,9 @@ def __contains__(self, other):
if self.host == other.host:
return True
# hostnames and IPs
return host_in_host(other.host, self.host)
radixtarget = RadixTarget()
radixtarget.insert(self.host)
return bool(radixtarget.search(other.host))
return False

def json(self, mode="json", siem_friendly=False):
Expand Down
18 changes: 9 additions & 9 deletions bbot/core/helpers/dns/dns.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@
import logging
import dns.exception
import dns.asyncresolver
from radixtarget import RadixTarget

from bbot.core.engine import EngineClient
from ..misc import clean_dns_record, is_ip, is_domain, is_dns_name, host_in_host
from ..misc import clean_dns_record, is_ip, is_domain, is_dns_name

from .engine import DNSEngine

Expand Down Expand Up @@ -63,10 +64,9 @@ def __init__(self, parent_helper):

# wildcard handling
self.wildcard_disable = self.config.get("dns_wildcard_disable", False)
self.wildcard_ignore = self.config.get("dns_wildcard_ignore", None)
if not self.wildcard_ignore:
self.wildcard_ignore = []
self.wildcard_ignore = tuple([str(d).strip().lower() for d in self.wildcard_ignore])
self.wildcard_ignore = RadixTarget()
for d in self.config.get("dns_wildcard_ignore", []):
self.wildcard_ignore.insert(d)

# copy the system's current resolvers to a text file for tool use
self.system_resolvers = dns.resolver.Resolver().nameservers
Expand Down Expand Up @@ -150,10 +150,10 @@ def _wildcard_prevalidation(self, host):
return False

# skip check if the query's parent domain is excluded in the config
for d in self.wildcard_ignore:
if host_in_host(host, d):
log.debug(f"Skipping wildcard detection on {host} because it is excluded in the config")
return False
wildcard_ignore = self.wildcard_ignore.search(host)
if wildcard_ignore:
log.debug(f"Skipping wildcard detection on {host} because {wildcard_ignore} is excluded in the config")
return False

return host

Expand Down
21 changes: 14 additions & 7 deletions bbot/core/helpers/dns/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -403,7 +403,8 @@ def new_task(query, rdtype):
if queries: # Start a new task for each one completed, if URLs remain
new_task(*queries.pop(0))

def extract_targets(self, record):
@staticmethod
def extract_targets(record):
"""
Extracts hostnames or IP addresses from a given DNS record.
Expand All @@ -429,24 +430,30 @@ def extract_targets(self, record):
"""
results = set()

def add_result(rdtype, _record):
cleaned = clean_dns_record(_record)
if cleaned:
results.add((rdtype, cleaned))

rdtype = str(record.rdtype.name).upper()
if rdtype in ("A", "AAAA", "NS", "CNAME", "PTR"):
results.add((rdtype, clean_dns_record(record)))
add_result(rdtype, record)
elif rdtype == "SOA":
results.add((rdtype, clean_dns_record(record.mname)))
add_result(rdtype, record.mname)
elif rdtype == "MX":
results.add((rdtype, clean_dns_record(record.exchange)))
add_result(rdtype, record.exchange)
elif rdtype == "SRV":
results.add((rdtype, clean_dns_record(record.target)))
add_result(rdtype, record.target)
elif rdtype == "TXT":
for s in record.strings:
s = smart_decode(s)
for match in dns_name_regex.finditer(s):
start, end = match.span()
host = s[start:end]
results.add((rdtype, host))
add_result(rdtype, host)
elif rdtype == "NSEC":
results.add((rdtype, clean_dns_record(record.next)))
add_result(rdtype, record.next)
else:
log.warning(f'Unknown DNS record type "{rdtype}"')
return results
Expand Down
71 changes: 5 additions & 66 deletions bbot/core/helpers/misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -637,7 +637,7 @@ def is_ip_type(i):
>>> is_ip_type("192.168.1.0/24")
False
"""
return isinstance(i, ipaddress._BaseV4) or isinstance(i, ipaddress._BaseV6)
return ipaddress._IPAddressBase in i.__class__.__mro__


def make_ip_type(s):
Expand All @@ -663,78 +663,17 @@ def make_ip_type(s):
>>> make_ip_type("evilcorp.com")
'evilcorp.com'
"""
if not s:
raise ValueError(f'Invalid hostname: "{s}"')
# IP address
with suppress(Exception):
return ipaddress.ip_address(str(s).strip())
return ipaddress.ip_address(s)
# IP network
with suppress(Exception):
return ipaddress.ip_network(str(s).strip(), strict=False)
return ipaddress.ip_network(s, strict=False)
return s


def host_in_host(host1, host2):
"""
Checks if host1 is included within host2, either as a subdomain, IP, or IP network.
Used for scope calculations/decisions within BBOT.
Args:
host1 (str or ipaddress.IPv4Address or ipaddress.IPv6Address or ipaddress.IPv4Network or ipaddress.IPv6Network):
The host to check for inclusion within host2.
host2 (str or ipaddress.IPv4Address or ipaddress.IPv6Address or ipaddress.IPv4Network or ipaddress.IPv6Network):
The host within which to check for the inclusion of host1.
Returns:
bool: True if host1 is included in host2, otherwise False.
Examples:
>>> host_in_host("www.evilcorp.com", "evilcorp.com")
True
>>> host_in_host("evilcorp.com", "www.evilcorp.com")
False
>>> host_in_host(ipaddress.IPv6Address('dead::beef'), ipaddress.IPv6Network('dead::/64'))
True
>>> host_in_host(ipaddress.IPv4Address('192.168.1.1'), ipaddress.IPv4Network('10.0.0.0/8'))
False
Notes:
- If checking an IP address/network, you MUST FIRST convert your IP into an ipaddress object (e.g. via `make_ip_type()`) before passing it to this function.
"""

"""
Is host1 included in host2?
"www.evilcorp.com" in "evilcorp.com"? --> True
"evilcorp.com" in "www.evilcorp.com"? --> False
IPv6Address('dead::beef') in IPv6Network('dead::/64')? --> True
IPv4Address('192.168.1.1') in IPv4Network('10.0.0.0/8')? --> False
Very important! Used throughout BBOT for scope calculations/decisions.
Works with hostnames, IPs, and IP networks.
"""

if not host1 or not host2:
return False

# check if hosts are IP types
host1_ip_type = is_ip_type(host1)
host2_ip_type = is_ip_type(host2)
# if both hosts are IP types
if host1_ip_type and host2_ip_type:
if not host1.version == host2.version:
return False
host1_net = ipaddress.ip_network(host1)
host2_net = ipaddress.ip_network(host2)
return host1_net.subnet_of(host2_net)

# else hostnames
elif not (host1_ip_type or host2_ip_type):
host2_len = len(host2.split("."))
host1_truncated = ".".join(host1.split(".")[-host2_len:])
return host1_truncated == host2

return False


def sha1(data):
"""
Computes the SHA-1 hash of the given data.
Expand Down
6 changes: 3 additions & 3 deletions bbot/modules/internal/cloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@ async def handle_event(self, event, kwargs):
hosts_to_check = set(str(s) for s in event.resolved_hosts)
hosts_to_check.add(str(event.host_original))
for host in hosts_to_check:
provider, provider_type, subnet = self.helpers.cloudcheck(host)
if provider:
event.add_tag(f"{provider_type}-{provider}")
for provider, provider_type, subnet in self.helpers.cloudcheck(host):
if provider:
event.add_tag(f"{provider_type}-{provider}")

found = set()
# look for cloud assets in hosts, http responses
Expand Down
1 change: 0 additions & 1 deletion bbot/scanner/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@ async def init_events(self, events):
sorted_events = sorted(events, key=lambda e: len(e.data))
for event in [self.scan.root_event] + sorted_events:
event._dummy = False
event.scope_distance = 0
event.web_spider_distance = 0
event.scan = self.scan
if event.source is None:
Expand Down
72 changes: 37 additions & 35 deletions bbot/scanner/target.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
import re
import copy
import logging
import ipaddress
from contextlib import suppress
from radixtarget import RadixTarget

from bbot.errors import *
from bbot.modules.base import BaseModule
from bbot.core.event import make_event, is_event
from bbot.core.helpers.misc import ip_network_parents, is_ip_type, domain_parents

log = logging.getLogger("bbot.core.target")

Expand All @@ -19,7 +20,8 @@ class Target:
strict_scope (bool): Flag indicating whether to consider child domains in-scope.
If set to True, only the exact hosts specified and not their children are considered part of the target.
_events (dict): Dictionary mapping hosts to events related to the target.
_radix (RadixTree): Radix tree for quick IP/DNS lookups.
_events (set): Flat set of contained events.
Examples:
Basic usage
Expand Down Expand Up @@ -85,8 +87,9 @@ def __init__(self, *targets, strict_scope=False):
"ORG_STUB": re.compile(r"^ORG:(.*)", re.IGNORECASE),
"ASN": re.compile(r"^ASN:(.*)", re.IGNORECASE),
}
self._events = set()
self._radix = RadixTarget()

self._events = dict()
if len(targets) > 0:
log.verbose(f"Creating events from {len(targets):,} targets")
for t in targets:
Expand Down Expand Up @@ -115,11 +118,8 @@ def add_target(self, t, event_type=None):
t = [t]
for single_target in t:
if type(single_target) == self.__class__:
for k, v in single_target._events.items():
try:
self._events[k].update(v)
except KeyError:
self._events[k] = set(single_target._events[k])
for event in single_target.events:
self._add_event(event)
else:
if is_event(single_target):
event = single_target
Expand All @@ -141,18 +141,12 @@ def add_target(self, t, event_type=None):
# allow commented lines
if not str(t).startswith("#"):
raise ValidationError(f'Could not add target "{t}": {e}')

try:
self._events[event.host].add(event)
except KeyError:
self._events[event.host] = {
event,
}
self._add_event(event)

@property
def events(self):
"""
A generator property that yields all events in the target.
Returns all events in the target.
Yields:
Event object: One of the Event objects stored in the `_events` dictionary.
Expand All @@ -164,14 +158,12 @@ def events(self):
Notes:
- This property is read-only.
- Iterating over this property gives you one event at a time from the `_events` dictionary.
"""
for _events in self._events.values():
yield from _events
return self._events

def copy(self):
"""
Creates and returns a copy of the Target object, including a shallow copy of the `_events` attribute.
Creates and returns a copy of the Target object, including a shallow copy of the `_events` and `_radix` attributes.
Returns:
Target: A new Target object with the sameattributes as the original.
Expand All @@ -193,12 +185,13 @@ def copy(self):
- The `scan` object reference is kept intact in the copied Target object.
"""
self_copy = self.__class__()
self_copy._events = dict(self._events)
self_copy._events = set(self._events)
self_copy._radix = copy.copy(self._radix)
return self_copy

def get(self, host):
"""
Gets the event associated with the specified host from the target's `_events` dictionary.
Gets the event associated with the specified host from the target's radix tree.
Args:
host (Event, Target, or str): The hostname, IP, URL, or event to look for.
Expand All @@ -224,15 +217,24 @@ def get(self, host):
return
if other.host:
with suppress(KeyError, StopIteration):
return next(iter(self._events[other.host]))
if is_ip_type(other.host):
for n in ip_network_parents(other.host, include_self=True):
with suppress(KeyError, StopIteration):
return next(iter(self._events[n]))
elif not self.strict_scope:
for h in domain_parents(other.host):
with suppress(KeyError, StopIteration):
return next(iter(self._events[h]))
result = self._radix.search(other.host)
if result is not None:
for event in result:
# if the result is a dns name and strict scope is enabled
if isinstance(event.host, str) and self.strict_scope:
# if the result doesn't exactly equal the host, abort
if event.host != other.host:
return
return event

def _add_event(self, event):
radix_data = self._radix.search(event.host)
if radix_data is None:
radix_data = {event}
self._radix.insert(event.host, radix_data)
else:
radix_data.add(event)
self._events.add(event)

def _contains(self, other):
if self.get(other) is not None:
Expand Down Expand Up @@ -282,11 +284,11 @@ def __len__(self):
- For other types of hosts, each unique event is counted as one.
"""
num_hosts = 0
for host, _events in self._events.items():
if type(host) in (ipaddress.IPv4Network, ipaddress.IPv6Network):
num_hosts += host.num_addresses
for event in self._events:
if isinstance(event.host, (ipaddress.IPv4Network, ipaddress.IPv6Network)):
num_hosts += event.host.num_addresses
else:
num_hosts += len(_events)
num_hosts += 1
return num_hosts


Expand Down
Loading

0 comments on commit 6feb345

Please sign in to comment.