From eaf2cdf6f206521470682e0fc6c8087b067c5c72 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Mon, 22 Apr 2024 17:23:18 -0400 Subject: [PATCH 01/10] implement radixtarget --- bbot/core/event/base.py | 6 ++- bbot/core/helpers/dns/dns.py | 20 ++++---- bbot/core/helpers/misc.py | 71 ++------------------------- bbot/scanner/target.py | 61 ++++++++++++----------- bbot/test/test_step_1/test_helpers.py | 10 ---- poetry.lock | 13 ++++- pyproject.toml | 1 + 7 files changed, 65 insertions(+), 117 deletions(-) diff --git a/bbot/core/event/base.py b/bbot/core/event/base.py index c036b9618..8c69d829d 100644 --- a/bbot/core/event/base.py +++ b/bbot/core/event/base.py @@ -8,6 +8,7 @@ from datetime import datetime from contextlib import suppress from urllib.parse import urljoin +from radixtarget import RadixTarget from pydantic import BaseModel, field_validator from .helpers import * @@ -15,7 +16,6 @@ from bbot.core.helpers import ( extract_words, get_file_extension, - host_in_host, is_domain, is_subdomain, is_ip, @@ -580,7 +580,9 @@ def __contains__(self, other): if self.host == other.host: return True # hostnames and IPs - return host_in_host(other.host, self.host) + radixtarget = RadixTarget() + radixtarget.insert(self.host) + return bool(radixtarget.search(other.host)) return False def json(self, mode="json", siem_friendly=False): diff --git a/bbot/core/helpers/dns/dns.py b/bbot/core/helpers/dns/dns.py index 9764687bf..2d78d2c19 100644 --- a/bbot/core/helpers/dns/dns.py +++ b/bbot/core/helpers/dns/dns.py @@ -2,9 +2,10 @@ import logging import dns.exception import dns.asyncresolver +from radixtarget import RadixTarget from bbot.core.engine import EngineClient -from ..misc import clean_dns_record, is_ip, is_domain, is_dns_name, host_in_host +from ..misc import clean_dns_record, is_ip, is_domain, is_dns_name from .engine import DNSEngine @@ -63,10 +64,9 @@ def __init__(self, parent_helper): # wildcard handling self.wildcard_disable = self.config.get("dns_wildcard_disable", False) - self.wildcard_ignore = self.config.get("dns_wildcard_ignore", None) - if not self.wildcard_ignore: - self.wildcard_ignore = [] - self.wildcard_ignore = tuple([str(d).strip().lower() for d in self.wildcard_ignore]) + self.wildcard_ignore = RadixTarget() + for d in self.config.get("dns_wildcard_ignore", []): + self.wildcard_ignore.insert(d) # copy the system's current resolvers to a text file for tool use self.system_resolvers = dns.resolver.Resolver().nameservers @@ -150,10 +150,12 @@ def _wildcard_prevalidation(self, host): return False # skip check if the query's parent domain is excluded in the config - for d in self.wildcard_ignore: - if host_in_host(host, d): - log.debug(f"Skipping wildcard detection on {host} because it is excluded in the config") - return False + wildcard_ignore = self.wildcard_ignore.search(host) + if wildcard_ignore: + log.debug( + f"Skipping wildcard detection on {host} because it or its parent domai ({wildcard_ignore}) is excluded in the config" + ) + return False return host diff --git a/bbot/core/helpers/misc.py b/bbot/core/helpers/misc.py index d6e3238d1..a4378069d 100644 --- a/bbot/core/helpers/misc.py +++ b/bbot/core/helpers/misc.py @@ -637,7 +637,7 @@ def is_ip_type(i): >>> is_ip_type("192.168.1.0/24") False """ - return isinstance(i, ipaddress._BaseV4) or isinstance(i, ipaddress._BaseV6) + return ipaddress._IPAddressBase in i.__class__.__mro__ def make_ip_type(s): @@ -663,78 +663,17 @@ def make_ip_type(s): >>> make_ip_type("evilcorp.com") 'evilcorp.com' """ + if not s: + raise ValueError(f'Invalid hostname: "{s}"') # IP address with suppress(Exception): - return ipaddress.ip_address(str(s).strip()) + return ipaddress.ip_address(s) # IP network with suppress(Exception): - return ipaddress.ip_network(str(s).strip(), strict=False) + return ipaddress.ip_network(s, strict=False) return s -def host_in_host(host1, host2): - """ - Checks if host1 is included within host2, either as a subdomain, IP, or IP network. - Used for scope calculations/decisions within BBOT. - - Args: - host1 (str or ipaddress.IPv4Address or ipaddress.IPv6Address or ipaddress.IPv4Network or ipaddress.IPv6Network): - The host to check for inclusion within host2. - host2 (str or ipaddress.IPv4Address or ipaddress.IPv6Address or ipaddress.IPv4Network or ipaddress.IPv6Network): - The host within which to check for the inclusion of host1. - - Returns: - bool: True if host1 is included in host2, otherwise False. - - Examples: - >>> host_in_host("www.evilcorp.com", "evilcorp.com") - True - >>> host_in_host("evilcorp.com", "www.evilcorp.com") - False - >>> host_in_host(ipaddress.IPv6Address('dead::beef'), ipaddress.IPv6Network('dead::/64')) - True - >>> host_in_host(ipaddress.IPv4Address('192.168.1.1'), ipaddress.IPv4Network('10.0.0.0/8')) - False - - Notes: - - If checking an IP address/network, you MUST FIRST convert your IP into an ipaddress object (e.g. via `make_ip_type()`) before passing it to this function. - """ - - """ - Is host1 included in host2? - "www.evilcorp.com" in "evilcorp.com"? --> True - "evilcorp.com" in "www.evilcorp.com"? --> False - IPv6Address('dead::beef') in IPv6Network('dead::/64')? --> True - IPv4Address('192.168.1.1') in IPv4Network('10.0.0.0/8')? --> False - - Very important! Used throughout BBOT for scope calculations/decisions. - - Works with hostnames, IPs, and IP networks. - """ - - if not host1 or not host2: - return False - - # check if hosts are IP types - host1_ip_type = is_ip_type(host1) - host2_ip_type = is_ip_type(host2) - # if both hosts are IP types - if host1_ip_type and host2_ip_type: - if not host1.version == host2.version: - return False - host1_net = ipaddress.ip_network(host1) - host2_net = ipaddress.ip_network(host2) - return host1_net.subnet_of(host2_net) - - # else hostnames - elif not (host1_ip_type or host2_ip_type): - host2_len = len(host2.split(".")) - host1_truncated = ".".join(host1.split(".")[-host2_len:]) - return host1_truncated == host2 - - return False - - def sha1(data): """ Computes the SHA-1 hash of the given data. diff --git a/bbot/scanner/target.py b/bbot/scanner/target.py index a0f8130c8..1016fd3cf 100644 --- a/bbot/scanner/target.py +++ b/bbot/scanner/target.py @@ -1,12 +1,13 @@ import re +import copy import logging import ipaddress from contextlib import suppress +from radixtarget import RadixTarget from bbot.errors import * from bbot.modules.base import BaseModule from bbot.core.event import make_event, is_event -from bbot.core.helpers.misc import ip_network_parents, is_ip_type, domain_parents log = logging.getLogger("bbot.core.target") @@ -19,7 +20,8 @@ class Target: strict_scope (bool): Flag indicating whether to consider child domains in-scope. If set to True, only the exact hosts specified and not their children are considered part of the target. - _events (dict): Dictionary mapping hosts to events related to the target. + _radix (RadixTree): Radix tree for quick IP/DNS lookups. + _events (set): Flat set of contained events. Examples: Basic usage @@ -85,8 +87,9 @@ def __init__(self, *targets, strict_scope=False): "ORG_STUB": re.compile(r"^ORG:(.*)", re.IGNORECASE), "ASN": re.compile(r"^ASN:(.*)", re.IGNORECASE), } + self._events = set() + self._radix = RadixTarget() - self._events = dict() if len(targets) > 0: log.verbose(f"Creating events from {len(targets):,} targets") for t in targets: @@ -142,17 +145,18 @@ def add_target(self, t, event_type=None): if not str(t).startswith("#"): raise ValidationError(f'Could not add target "{t}": {e}') - try: - self._events[event.host].add(event) - except KeyError: - self._events[event.host] = { - event, - } + radix_data = self._radix.search(event.host) + if radix_data is None: + radix_data = {event} + self._radix.insert(event.host, radix_data) + else: + radix_data.add(event) + self._events.add(event) @property def events(self): """ - A generator property that yields all events in the target. + Returns all events in the target. Yields: Event object: One of the Event objects stored in the `_events` dictionary. @@ -164,14 +168,12 @@ def events(self): Notes: - This property is read-only. - - Iterating over this property gives you one event at a time from the `_events` dictionary. """ - for _events in self._events.values(): - yield from _events + return self._events def copy(self): """ - Creates and returns a copy of the Target object, including a shallow copy of the `_events` attribute. + Creates and returns a copy of the Target object, including a shallow copy of the `_events` and `_radix` attributes. Returns: Target: A new Target object with the sameattributes as the original. @@ -193,12 +195,13 @@ def copy(self): - The `scan` object reference is kept intact in the copied Target object. """ self_copy = self.__class__() - self_copy._events = dict(self._events) + self_copy._events = set(self._events) + self_copy._radix = copy.copy(self._radix) return self_copy def get(self, host): """ - Gets the event associated with the specified host from the target's `_events` dictionary. + Gets the event associated with the specified host from the target's radix tree. Args: host (Event, Target, or str): The hostname, IP, URL, or event to look for. @@ -224,15 +227,15 @@ def get(self, host): return if other.host: with suppress(KeyError, StopIteration): - return next(iter(self._events[other.host])) - if is_ip_type(other.host): - for n in ip_network_parents(other.host, include_self=True): - with suppress(KeyError, StopIteration): - return next(iter(self._events[n])) - elif not self.strict_scope: - for h in domain_parents(other.host): - with suppress(KeyError, StopIteration): - return next(iter(self._events[h])) + result = self._radix.search(other.host) + if result is not None: + for event in result: + # if the result is a dns name and strict scope is enabled + if isinstance(result, str) and self.strict_scope: + # if the result doesn't exactly equal the host, abort + if event.host != other.host: + return + return event def _contains(self, other): if self.get(other) is not None: @@ -282,11 +285,11 @@ def __len__(self): - For other types of hosts, each unique event is counted as one. """ num_hosts = 0 - for host, _events in self._events.items(): - if type(host) in (ipaddress.IPv4Network, ipaddress.IPv6Network): - num_hosts += host.num_addresses + for event in self._events: + if isinstance(event.host, (ipaddress.IPv4Network, ipaddress.IPv6Network)): + num_hosts += event.host.num_addresses else: - num_hosts += len(_events) + num_hosts += 1 return num_hosts diff --git a/bbot/test/test_step_1/test_helpers.py b/bbot/test/test_step_1/test_helpers.py index 0ce3e0c76..4e3f3993e 100644 --- a/bbot/test/test_step_1/test_helpers.py +++ b/bbot/test/test_step_1/test_helpers.py @@ -103,16 +103,6 @@ async def test_helpers_misc(helpers, scan, bbot_scanner, bbot_httpserver): assert helpers.domain_stem("evilcorp.co.uk") == "evilcorp" assert helpers.domain_stem("www.evilcorp.co.uk") == "www.evilcorp" - assert helpers.host_in_host("www.evilcorp.com", "evilcorp.com") == True - assert helpers.host_in_host("asdf.www.evilcorp.com", "evilcorp.com") == True - assert helpers.host_in_host("evilcorp.com", "www.evilcorp.com") == False - assert helpers.host_in_host("evilcorp.com", "evilcorp.com") == True - assert helpers.host_in_host("evilcorp.com", "eevilcorp.com") == False - assert helpers.host_in_host("eevilcorp.com", "evilcorp.com") == False - assert helpers.host_in_host("evilcorp.com", "evilcorp") == False - assert helpers.host_in_host("evilcorp", "evilcorp.com") == False - assert helpers.host_in_host("evilcorp.com", "com") == True - assert tuple(await helpers.re.extract_emails("asdf@asdf.com\nT@t.Com&a=a@a.com__ b@b.com")) == ( "asdf@asdf.com", "t@t.com", diff --git a/poetry.lock b/poetry.lock index 05386f0a4..034b4fef6 100644 --- a/poetry.lock +++ b/poetry.lock @@ -2084,6 +2084,17 @@ files = [ [package.dependencies] cffi = {version = "*", markers = "implementation_name == \"pypy\""} +[[package]] +name = "radixtarget" +version = "1.0.0.15" +description = "Check whether an IP address belongs to a cloud provider" +optional = false +python-versions = "<4.0,>=3.9" +files = [ + {file = "radixtarget-1.0.0.15-py3-none-any.whl", hash = "sha256:4e3f0620bfbc0ef2ff3d71270dd281c0e8428906d260f737f82b573a7b636dd8"}, + {file = "radixtarget-1.0.0.15.tar.gz", hash = "sha256:c8294ebbb76e6d2826deaa8fe18d568308eddfd25f20644e166c492d2626a70c"}, +] + [[package]] name = "regex" version = "2024.4.16" @@ -2625,4 +2636,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "15633b02fcedb3d044f4e40a45ce1e9dd7209608a0389175a4523e3810a8504b" +content-hash = "100618fdac0971d8b3662f2bfe72a8fae4f221ca78dfc6a0edf605859ab64f3f" diff --git a/pyproject.toml b/pyproject.toml index 0cc6eed31..1c0c15a9c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -51,6 +51,7 @@ pyzmq = "^25.1.2" regex = "^2024.4.16" unidecode = "^1.3.8" cloudcheck = "^4.0.0.345" +radixtarget = "^1.0.0.15" [tool.poetry.group.dev.dependencies] flake8 = ">=6,<8" From af110c9a6bf4fec254471e63674d11beee1a30c9 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Mon, 22 Apr 2024 17:35:32 -0400 Subject: [PATCH 02/10] better scope tests --- bbot/scanner/target.py | 2 +- bbot/test/test_step_1/test_target.py | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/bbot/scanner/target.py b/bbot/scanner/target.py index 1016fd3cf..7059bda70 100644 --- a/bbot/scanner/target.py +++ b/bbot/scanner/target.py @@ -231,7 +231,7 @@ def get(self, host): if result is not None: for event in result: # if the result is a dns name and strict scope is enabled - if isinstance(result, str) and self.strict_scope: + if isinstance(event.host, str) and self.strict_scope: # if the result doesn't exactly equal the host, abort if event.host != other.host: return diff --git a/bbot/test/test_step_1/test_target.py b/bbot/test/test_step_1/test_target.py index dced8af02..521593191 100644 --- a/bbot/test/test_step_1/test_target.py +++ b/bbot/test/test_step_1/test_target.py @@ -38,3 +38,13 @@ def test_target(bbot_scanner): assert scan1.target.get("2001:4860:4860::888c") is None assert str(scan1.target.get("www.api.publicapis.org").host) == "api.publicapis.org" assert scan1.target.get("publicapis.org") is None + + from bbot.scanner.target import Target + target = Target("evilcorp.com") + assert not "com" in target + assert "evilcorp.com" in target + assert "www.evilcorp.com" in target + strict_target = Target("evilcorp.com", strict_scope=True) + assert not "com" in strict_target + assert "evilcorp.com" in strict_target + assert not "www.evilcorp.com" in strict_target From 8f72db74e982778b491d5a4bb2fef94fe7779a80 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Mon, 22 Apr 2024 17:35:46 -0400 Subject: [PATCH 03/10] blacked --- bbot/test/test_step_1/test_target.py | 1 + 1 file changed, 1 insertion(+) diff --git a/bbot/test/test_step_1/test_target.py b/bbot/test/test_step_1/test_target.py index 521593191..cf210c0f6 100644 --- a/bbot/test/test_step_1/test_target.py +++ b/bbot/test/test_step_1/test_target.py @@ -40,6 +40,7 @@ def test_target(bbot_scanner): assert scan1.target.get("publicapis.org") is None from bbot.scanner.target import Target + target = Target("evilcorp.com") assert not "com" in target assert "evilcorp.com" in target From 4f073125ca89d1f523667f15b127d008148a5940 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Mon, 22 Apr 2024 17:42:23 -0400 Subject: [PATCH 04/10] update cloudcheck --- poetry.lock | 9 +++++---- pyproject.toml | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/poetry.lock b/poetry.lock index 034b4fef6..be6fea410 100644 --- a/poetry.lock +++ b/poetry.lock @@ -388,18 +388,19 @@ colorama = {version = "*", markers = "platform_system == \"Windows\""} [[package]] name = "cloudcheck" -version = "4.0.0.345" +version = "5.0.0.350" description = "Check whether an IP address belongs to a cloud provider" optional = false python-versions = "<4.0,>=3.9" files = [ - {file = "cloudcheck-4.0.0.345-py3-none-any.whl", hash = "sha256:82a1cecaa0ec35a50d6c1e4884a9535eb4c1c788b845b0c4a91b44935f4dc765"}, - {file = "cloudcheck-4.0.0.345.tar.gz", hash = "sha256:787953a305c0be6e6eb4ceb9990dccb633f9e1429d5ebfda7acf7dca35b3caeb"}, + {file = "cloudcheck-5.0.0.350-py3-none-any.whl", hash = "sha256:6f2ed981818bde6d8b6c5a6413a843e11d0aa1a4bf8b36452dcae1030a537dd6"}, + {file = "cloudcheck-5.0.0.350.tar.gz", hash = "sha256:cb59dfef966268ebc176e242634b84a3423a84ffaf4fac40566f37edfaddc106"}, ] [package.dependencies] httpx = ">=0.26,<0.28" pydantic = ">=2.4.2,<3.0.0" +radixtarget = ">=1.0.0.14,<2.0.0.0" regex = ">=2024.4.16,<2025.0.0" [[package]] @@ -2636,4 +2637,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "100618fdac0971d8b3662f2bfe72a8fae4f221ca78dfc6a0edf605859ab64f3f" +content-hash = "ed8bb07e4ff5a5f665402db33f9016409547bef1ccb6a8c2c626c44fde075abb" diff --git a/pyproject.toml b/pyproject.toml index 1c0c15a9c..7ba00c488 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -50,8 +50,8 @@ jinja2 = "^3.1.3" pyzmq = "^25.1.2" regex = "^2024.4.16" unidecode = "^1.3.8" -cloudcheck = "^4.0.0.345" radixtarget = "^1.0.0.15" +cloudcheck = "^5.0.0.350" [tool.poetry.group.dev.dependencies] flake8 = ">=6,<8" From 53f71e9af883396da0cb22712f306c5b3129f12c Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Mon, 22 Apr 2024 17:43:09 -0400 Subject: [PATCH 05/10] fix cloudcheck --- bbot/modules/internal/cloud.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bbot/modules/internal/cloud.py b/bbot/modules/internal/cloud.py index e6bab4baa..29abef4d2 100644 --- a/bbot/modules/internal/cloud.py +++ b/bbot/modules/internal/cloud.py @@ -24,9 +24,9 @@ async def handle_event(self, event, kwargs): hosts_to_check = set(str(s) for s in event.resolved_hosts) hosts_to_check.add(str(event.host_original)) for host in hosts_to_check: - provider, provider_type, subnet = self.helpers.cloudcheck(host) - if provider: - event.add_tag(f"{provider_type}-{provider}") + for provider, provider_type, subnet in self.helpers.cloudcheck(host) + if provider: + event.add_tag(f"{provider_type}-{provider}") found = set() # look for cloud assets in hosts, http responses From a3c8e61da81c42decfc6de3659928d0cc6ddba9f Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Mon, 22 Apr 2024 17:48:53 -0400 Subject: [PATCH 06/10] better target tests --- bbot/modules/internal/cloud.py | 2 +- bbot/scanner/target.py | 25 ++++++++++++------------- bbot/test/test_step_1/test_target.py | 11 +++++++++++ 3 files changed, 24 insertions(+), 14 deletions(-) diff --git a/bbot/modules/internal/cloud.py b/bbot/modules/internal/cloud.py index 29abef4d2..7939487fd 100644 --- a/bbot/modules/internal/cloud.py +++ b/bbot/modules/internal/cloud.py @@ -24,7 +24,7 @@ async def handle_event(self, event, kwargs): hosts_to_check = set(str(s) for s in event.resolved_hosts) hosts_to_check.add(str(event.host_original)) for host in hosts_to_check: - for provider, provider_type, subnet in self.helpers.cloudcheck(host) + for provider, provider_type, subnet in self.helpers.cloudcheck(host): if provider: event.add_tag(f"{provider_type}-{provider}") diff --git a/bbot/scanner/target.py b/bbot/scanner/target.py index 7059bda70..b19d1b6a6 100644 --- a/bbot/scanner/target.py +++ b/bbot/scanner/target.py @@ -118,11 +118,8 @@ def add_target(self, t, event_type=None): t = [t] for single_target in t: if type(single_target) == self.__class__: - for k, v in single_target._events.items(): - try: - self._events[k].update(v) - except KeyError: - self._events[k] = set(single_target._events[k]) + for event in single_target.events: + self._add_event(event) else: if is_event(single_target): event = single_target @@ -144,14 +141,7 @@ def add_target(self, t, event_type=None): # allow commented lines if not str(t).startswith("#"): raise ValidationError(f'Could not add target "{t}": {e}') - - radix_data = self._radix.search(event.host) - if radix_data is None: - radix_data = {event} - self._radix.insert(event.host, radix_data) - else: - radix_data.add(event) - self._events.add(event) + self._add_event(event) @property def events(self): @@ -237,6 +227,15 @@ def get(self, host): return return event + def _add_event(self, event): + radix_data = self._radix.search(event.host) + if radix_data is None: + radix_data = {event} + self._radix.insert(event.host, radix_data) + else: + radix_data.add(event) + self._events.add(event) + def _contains(self, other): if self.get(other) is not None: return True diff --git a/bbot/test/test_step_1/test_target.py b/bbot/test/test_step_1/test_target.py index cf210c0f6..ed5c1b7ef 100644 --- a/bbot/test/test_step_1/test_target.py +++ b/bbot/test/test_step_1/test_target.py @@ -49,3 +49,14 @@ def test_target(bbot_scanner): assert not "com" in strict_target assert "evilcorp.com" in strict_target assert not "www.evilcorp.com" in strict_target + + target = Target() + target.add_target("evilcorp.com") + assert not "com" in target + assert "evilcorp.com" in target + assert "www.evilcorp.com" in target + strict_target = Target(strict_scope=True) + strict_target.add_target("evilcorp.com") + assert not "com" in strict_target + assert "evilcorp.com" in strict_target + assert not "www.evilcorp.com" in strict_target From d42c189bf4eef282cd6bcb3d7c8b41143455bdab Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Mon, 22 Apr 2024 18:00:46 -0400 Subject: [PATCH 07/10] fix typo --- bbot/core/helpers/dns/dns.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bbot/core/helpers/dns/dns.py b/bbot/core/helpers/dns/dns.py index 2d78d2c19..5b5365f28 100644 --- a/bbot/core/helpers/dns/dns.py +++ b/bbot/core/helpers/dns/dns.py @@ -153,7 +153,7 @@ def _wildcard_prevalidation(self, host): wildcard_ignore = self.wildcard_ignore.search(host) if wildcard_ignore: log.debug( - f"Skipping wildcard detection on {host} because it or its parent domai ({wildcard_ignore}) is excluded in the config" + f"Skipping wildcard detection on {host} because {wildcard_ignore} is excluded in the config" ) return False From 8c07684ca7364966cf5b688e374ce6e2e4134415 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Mon, 22 Apr 2024 18:18:28 -0400 Subject: [PATCH 08/10] better dns name sanitization --- bbot/core/helpers/dns/dns.py | 4 +--- bbot/core/helpers/dns/engine.py | 21 ++++++++++++++------- bbot/test/test_step_1/test_dns.py | 11 +++++++++++ 3 files changed, 26 insertions(+), 10 deletions(-) diff --git a/bbot/core/helpers/dns/dns.py b/bbot/core/helpers/dns/dns.py index 5b5365f28..7f775483c 100644 --- a/bbot/core/helpers/dns/dns.py +++ b/bbot/core/helpers/dns/dns.py @@ -152,9 +152,7 @@ def _wildcard_prevalidation(self, host): # skip check if the query's parent domain is excluded in the config wildcard_ignore = self.wildcard_ignore.search(host) if wildcard_ignore: - log.debug( - f"Skipping wildcard detection on {host} because {wildcard_ignore} is excluded in the config" - ) + log.debug(f"Skipping wildcard detection on {host} because {wildcard_ignore} is excluded in the config") return False return host diff --git a/bbot/core/helpers/dns/engine.py b/bbot/core/helpers/dns/engine.py index b8e184264..6018e0e3f 100644 --- a/bbot/core/helpers/dns/engine.py +++ b/bbot/core/helpers/dns/engine.py @@ -403,7 +403,8 @@ def new_task(query, rdtype): if queries: # Start a new task for each one completed, if URLs remain new_task(*queries.pop(0)) - def extract_targets(self, record): + @staticmethod + def extract_targets(record): """ Extracts hostnames or IP addresses from a given DNS record. @@ -429,24 +430,30 @@ def extract_targets(self, record): """ results = set() + + def add_result(rdtype, _record): + cleaned = clean_dns_record(_record) + if cleaned: + results.add((rdtype, cleaned)) + rdtype = str(record.rdtype.name).upper() if rdtype in ("A", "AAAA", "NS", "CNAME", "PTR"): - results.add((rdtype, clean_dns_record(record))) + add_result(rdtype, record) elif rdtype == "SOA": - results.add((rdtype, clean_dns_record(record.mname))) + add_result(rdtype, record.mname) elif rdtype == "MX": - results.add((rdtype, clean_dns_record(record.exchange))) + add_result(rdtype, record.exchange) elif rdtype == "SRV": - results.add((rdtype, clean_dns_record(record.target))) + add_result(rdtype, record.target) elif rdtype == "TXT": for s in record.strings: s = smart_decode(s) for match in dns_name_regex.finditer(s): start, end = match.span() host = s[start:end] - results.add((rdtype, host)) + add_result(rdtype, host) elif rdtype == "NSEC": - results.add((rdtype, clean_dns_record(record.next))) + add_result(rdtype, record.next) else: log.warning(f'Unknown DNS record type "{rdtype}"') return results diff --git a/bbot/test/test_step_1/test_dns.py b/bbot/test/test_step_1/test_dns.py index afc5c1967..05796e464 100644 --- a/bbot/test/test_step_1/test_dns.py +++ b/bbot/test/test_step_1/test_dns.py @@ -29,6 +29,17 @@ async def test_dns_engine(bbot_scanner): pass_2 = True assert pass_1 and pass_2 + from bbot.core.helpers.dns.engine import DNSEngine + from bbot.core.helpers.dns.mock import MockResolver + + # ensure dns records are being properly cleaned + mockresolver = MockResolver({"evilcorp.com": {"MX": ["0 ."]}}) + mx_records = await mockresolver.resolve("evilcorp.com", rdtype="MX") + results = set() + for r in mx_records: + results.update(DNSEngine.extract_targets(r)) + assert not results + @pytest.mark.asyncio async def test_dns_resolution(bbot_scanner): From e4fd60af06d6e9570105f7fdf49bd6c4d7d46661 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Mon, 22 Apr 2024 19:06:51 -0400 Subject: [PATCH 09/10] fix ffuf tests --- .../test_step_2/module_tests/test_module_ffuf_shortnames.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bbot/test/test_step_2/module_tests/test_module_ffuf_shortnames.py b/bbot/test/test_step_2/module_tests/test_module_ffuf_shortnames.py index cbbec11ea..1f624a410 100644 --- a/bbot/test/test_step_2/module_tests/test_module_ffuf_shortnames.py +++ b/bbot/test/test_step_2/module_tests/test_module_ffuf_shortnames.py @@ -143,7 +143,7 @@ async def setup_after_prep(self, module_test): tags=["shortname-file"], ) ) - module_test.scan.target._events["http://127.0.0.1:8888"] = seed_events + module_test.scan.target._events = set(seed_events) expect_args = {"method": "GET", "uri": "/administrator.aspx"} respond_args = {"response_data": "alive"} From faf61eecd47c6ef88aa9b317ac80a8700b064439 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Tue, 23 Apr 2024 11:25:37 -0400 Subject: [PATCH 10/10] small scope tweak --- bbot/core/event/base.py | 2 +- bbot/scanner/manager.py | 1 - .../test_manager_scope_accuracy.py | 28 +++---- bbot/test/test_step_1/test_scope.py | 75 ++++++++++++++++++- 4 files changed, 88 insertions(+), 18 deletions(-) diff --git a/bbot/core/event/base.py b/bbot/core/event/base.py index 8c69d829d..d7eabd6db 100644 --- a/bbot/core/event/base.py +++ b/bbot/core/event/base.py @@ -93,7 +93,7 @@ class BaseEvent: # Always emit this event type even if it's not in scope _always_emit = False # Always emit events with these tags even if they're not in scope - _always_emit_tags = ["affiliate"] + _always_emit_tags = ["affiliate", "target"] # Bypass scope checking and dns resolution, distribute immediately to modules # This is useful for "end-of-line" events like FINDING and VULNERABILITY _quick_emit = False diff --git a/bbot/scanner/manager.py b/bbot/scanner/manager.py index 76d7b6028..6fa59cf3e 100644 --- a/bbot/scanner/manager.py +++ b/bbot/scanner/manager.py @@ -43,7 +43,6 @@ async def init_events(self, events): sorted_events = sorted(events, key=lambda e: len(e.data)) for event in [self.scan.root_event] + sorted_events: event._dummy = False - event.scope_distance = 0 event.web_spider_distance = 0 event.scan = self.scan if event.source is None: diff --git a/bbot/test/test_step_1/test_manager_scope_accuracy.py b/bbot/test/test_step_1/test_manager_scope_accuracy.py index bc79a0029..dbca45276 100644 --- a/bbot/test/test_step_1/test_manager_scope_accuracy.py +++ b/bbot/test/test_step_1/test_manager_scope_accuracy.py @@ -750,12 +750,12 @@ def custom_setup(scan): "127.0.0.0/31", modules=["sslcert"], whitelist=["127.0.1.0"], - _config={"dns_resolution": False, "scope_report_distance": 0, "speculate": True, "modules": {"speculate": {"ports": "9999"}}}, + _config={"dns_resolution": False, "scope_report_distance": 0, "scope_search_distance": 1, "speculate": True, "modules": {"speculate": {"ports": "9999"}}}, _dns_mock={"www.bbottest.notreal": {"A": ["127.0.0.1"]}, "test.notreal": {"A": ["127.0.1.0"]}}, ) assert len(events) == 3 - assert 1 == len([e for e in events if e.type == "IP_RANGE" and e.data == "127.0.0.0/31" and e.internal == False and e.scope_distance == 0]) + assert 1 == len([e for e in events if e.type == "IP_RANGE" and e.data == "127.0.0.0/31" and e.internal == False and e.scope_distance == 1]) assert 0 == len([e for e in events if e.type == "IP_ADDRESS" and e.data == "127.0.0.0"]) assert 0 == len([e for e in events if e.type == "IP_ADDRESS" and e.data == "127.0.0.1"]) assert 0 == len([e for e in events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.0:9999"]) @@ -765,30 +765,30 @@ def custom_setup(scan): assert 0 == len([e for e in events if e.type == "OPEN_TCP_PORT" and e.data == "test.notreal:9999"]) assert len(all_events) == 11 - assert 1 == len([e for e in all_events if e.type == "IP_RANGE" and e.data == "127.0.0.0/31" and e.internal == False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.0" and e.internal == True and e.scope_distance == 1]) - assert 2 == len([e for e in all_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.1" and e.internal == True and e.scope_distance == 1]) - assert 1 == len([e for e in all_events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.0:9999" and e.internal == True and e.scope_distance == 1]) + assert 1 == len([e for e in all_events if e.type == "IP_RANGE" and e.data == "127.0.0.0/31" and e.internal == False and e.scope_distance == 1]) + assert 1 == len([e for e in all_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.0" and e.internal == True and e.scope_distance == 2]) + assert 2 == len([e for e in all_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.1" and e.internal == True and e.scope_distance == 2]) + assert 1 == len([e for e in all_events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.0:9999" and e.internal == True and e.scope_distance == 2]) assert 2 == len([e for e in all_events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.1:9999" and e.internal == True and e.scope_distance == 1]) assert 1 == len([e for e in all_events if e.type == "DNS_NAME" and e.data == "test.notreal" and e.internal == False and e.scope_distance == 0 and str(e.module) == "sslcert"]) - assert 1 == len([e for e in all_events if e.type == "DNS_NAME" and e.data == "www.bbottest.notreal" and e.internal == True and e.scope_distance == 2 and str(e.module) == "sslcert"]) + assert 1 == len([e for e in all_events if e.type == "DNS_NAME" and e.data == "www.bbottest.notreal" and e.internal == True and e.scope_distance == 3 and str(e.module) == "sslcert"]) assert 1 == len([e for e in all_events if e.type == "OPEN_TCP_PORT" and e.data == "test.notreal:9999" and e.internal == True and e.scope_distance == 0 and str(e.module) == "speculate"]) assert len(all_events_nodups) == 9 - assert 1 == len([e for e in all_events_nodups if e.type == "IP_RANGE" and e.data == "127.0.0.0/31" and e.internal == False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events_nodups if e.type == "IP_ADDRESS" and e.data == "127.0.0.0" and e.internal == True and e.scope_distance == 1]) - assert 1 == len([e for e in all_events_nodups if e.type == "IP_ADDRESS" and e.data == "127.0.0.1" and e.internal == True and e.scope_distance == 1]) - assert 1 == len([e for e in all_events_nodups if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.0:9999" and e.internal == True and e.scope_distance == 1]) + assert 1 == len([e for e in all_events_nodups if e.type == "IP_RANGE" and e.data == "127.0.0.0/31" and e.internal == False and e.scope_distance == 1]) + assert 1 == len([e for e in all_events_nodups if e.type == "IP_ADDRESS" and e.data == "127.0.0.0" and e.internal == True and e.scope_distance == 2]) + assert 1 == len([e for e in all_events_nodups if e.type == "IP_ADDRESS" and e.data == "127.0.0.1" and e.internal == True and e.scope_distance == 2]) + assert 1 == len([e for e in all_events_nodups if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.0:9999" and e.internal == True and e.scope_distance == 2]) assert 1 == len([e for e in all_events_nodups if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.1:9999" and e.internal == True and e.scope_distance == 1]) assert 1 == len([e for e in all_events_nodups if e.type == "DNS_NAME" and e.data == "test.notreal" and e.internal == False and e.scope_distance == 0 and str(e.module) == "sslcert"]) - assert 1 == len([e for e in all_events_nodups if e.type == "DNS_NAME" and e.data == "www.bbottest.notreal" and e.internal == True and e.scope_distance == 2 and str(e.module) == "sslcert"]) + assert 1 == len([e for e in all_events_nodups if e.type == "DNS_NAME" and e.data == "www.bbottest.notreal" and e.internal == True and e.scope_distance == 3 and str(e.module) == "sslcert"]) assert 1 == len([e for e in all_events_nodups if e.type == "OPEN_TCP_PORT" and e.data == "test.notreal:9999" and e.internal == True and e.scope_distance == 0 and str(e.module) == "speculate"]) for _graph_output_events in (graph_output_events, graph_output_batch_events): assert len(_graph_output_events) == 5 - assert 1 == len([e for e in graph_output_events if e.type == "IP_RANGE" and e.data == "127.0.0.0/31" and e.internal == False and e.scope_distance == 0]) + assert 1 == len([e for e in graph_output_events if e.type == "IP_RANGE" and e.data == "127.0.0.0/31" and e.internal == False and e.scope_distance == 1]) assert 0 == len([e for e in graph_output_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.0"]) - assert 1 == len([e for e in graph_output_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.1" and e.internal == True and e.scope_distance == 1]) + assert 1 == len([e for e in graph_output_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.1" and e.internal == True and e.scope_distance == 2]) assert 0 == len([e for e in graph_output_events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.0:9999"]) assert 1 == len([e for e in graph_output_events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.1:9999" and e.internal == True and e.scope_distance == 1]) assert 1 == len([e for e in graph_output_events if e.type == "DNS_NAME" and e.data == "test.notreal" and e.internal == False and e.scope_distance == 0 and str(e.module) == "sslcert"]) diff --git a/bbot/test/test_step_1/test_scope.py b/bbot/test/test_step_1/test_scope.py index e51fec973..7435b82af 100644 --- a/bbot/test/test_step_1/test_scope.py +++ b/bbot/test/test_step_1/test_scope.py @@ -2,10 +2,58 @@ from ..test_step_2.module_tests.base import ModuleTestBase -class Scope_test_blacklist(ModuleTestBase): +class TestScopeBaseline(ModuleTestBase): targets = ["http://127.0.0.1:8888"] modules_overrides = ["httpx"] + async def setup_after_prep(self, module_test): + expect_args = {"method": "GET", "uri": "/"} + respond_args = {"response_data": "alive"} + module_test.set_expect_requests(expect_args=expect_args, respond_args=respond_args) + + def check(self, module_test, events): + assert len(events) == 6 + assert 1 == len( + [ + e + for e in events + if e.type == "URL_UNVERIFIED" + and str(e.host) == "127.0.0.1" + and e.scope_distance == 0 + and "target" in e.tags + ] + ) + # we have two of these because the host module considers "always_emit" in its outgoing deduplication + assert 2 == len( + [ + e + for e in events + if e.type == "IP_ADDRESS" + and e.data == "127.0.0.1" + and e.scope_distance == 0 + and str(e.module) == "host" + ] + ) + assert 1 == len( + [ + e + for e in events + if e.type == "HTTP_RESPONSE" + and str(e.host) == "127.0.0.1" + and e.port == 8888 + and e.scope_distance == 0 + ] + ) + assert 1 == len( + [ + e + for e in events + if e.type == "URL" and str(e.host) == "127.0.0.1" and e.port == 8888 and e.scope_distance == 0 + ] + ) + + +class TestScopeBlacklist(TestScopeBaseline): blacklist = ["127.0.0.1"] async def setup_after_prep(self, module_test): @@ -14,9 +62,32 @@ async def setup_after_prep(self, module_test): module_test.set_expect_requests(expect_args=expect_args, respond_args=respond_args) def check(self, module_test, events): + assert len(events) == 1 assert not any(e.type == "URL" for e in events) + assert not any(str(e.host) == "127.0.0.1" for e in events) -class Scope_test_whitelist(Scope_test_blacklist): +class TestScopeWhitelist(TestScopeBlacklist): blacklist = [] whitelist = ["255.255.255.255"] + + def check(self, module_test, events): + assert len(events) == 3 + assert not any(e.type == "URL" for e in events) + assert 1 == len( + [ + e + for e in events + if e.type == "IP_ADDRESS" and e.data == "127.0.0.1" and e.scope_distance == 1 and "target" in e.tags + ] + ) + assert 1 == len( + [ + e + for e in events + if e.type == "URL_UNVERIFIED" + and str(e.host) == "127.0.0.1" + and e.scope_distance == 1 + and "target" in e.tags + ] + )