From d8a6418773c2d77ad81235293adde7e5c5665104 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Thu, 1 Feb 2024 19:24:43 -0500 Subject: [PATCH 01/41] don't create asyncio tasks for dns stuff --- bbot/core/helpers/dns.py | 34 +++++++++------------------------- 1 file changed, 9 insertions(+), 25 deletions(-) diff --git a/bbot/core/helpers/dns.py b/bbot/core/helpers/dns.py index 9ad22116f1..cc3cded6c7 100644 --- a/bbot/core/helpers/dns.py +++ b/bbot/core/helpers/dns.py @@ -627,24 +627,13 @@ def event_cache_get(self, host): except KeyError: return set(), None, None, set() - async def _resolve_batch_coro_wrapper(self, q, **kwargs): - """ - Helps us correlate task results back to their original arguments - """ - result = await self.resolve(q, **kwargs) - return (q, result) - async def resolve_batch(self, queries, **kwargs): """ - Asynchronously resolves a batch of queries in parallel and yields the results as they are completed. - - This method wraps around `_resolve_batch_coro_wrapper` to resolve a list of queries in parallel. - It batches the queries to a manageable size and executes them asynchronously, respecting - global rate limits. + A helper to execute a bunch of DNS requests. Args: queries (list): List of queries to resolve. - **kwargs: Additional keyword arguments to pass to `_resolve_batch_coro_wrapper`. + **kwargs: Additional keyword arguments to pass to `resolve()`. Yields: tuple: A tuple containing the original query and its resolved value. @@ -658,13 +647,8 @@ async def resolve_batch(self, queries, **kwargs): ('evilcorp.com', {'2.2.2.2'}) """ - queries = list(queries) - batch_size = 250 - for i in range(0, len(queries), batch_size): - batch = queries[i : i + batch_size] - tasks = [asyncio.create_task(self._resolve_batch_coro_wrapper(q, **kwargs)) for q in batch] - async for task in as_completed(tasks): - yield await task + for q in queries: + (q, await self.resolve(q, **kwargs)) def extract_targets(self, record): """ @@ -837,9 +821,7 @@ async def is_wildcard(self, query, ips=None, rdtype=None): # if the caller hasn't already done the work of resolving the IPs if ips is None: # then resolve the query for all rdtypes - base_query_tasks = { - t: asyncio.create_task(self.resolve_raw(query, type=t, use_cache=True)) for t in rdtypes_to_check - } + base_query_tasks = {t: self.resolve_raw(query, type=t, use_cache=True) for t in rdtypes_to_check} for _rdtype, task in base_query_tasks.items(): raw_results, errors = await task if errors and not raw_results: @@ -949,6 +931,8 @@ async def is_wildcard_domain(self, domain, log_info=False): wildcard_domain_results[host] = self._wildcard_cache[host_hash] continue + log.verbose(f"Checking if {host} is a wildcard") + # determine if this is a wildcard domain wildcard_tasks = {t: [] for t in rdtypes_to_check} # resolve a bunch of random subdomains of the same parent @@ -958,14 +942,14 @@ async def is_wildcard_domain(self, domain, log_info=False): # continue for _ in range(self.wildcard_tests): rand_query = f"{rand_string(digits=False, length=10)}.{host}" - wildcard_task = asyncio.create_task(self.resolve(rand_query, type=rdtype, use_cache=False)) + wildcard_task = self.resolve(rand_query, type=rdtype, use_cache=False) wildcard_tasks[rdtype].append(wildcard_task) # combine the random results is_wildcard = False wildcard_results = dict() for rdtype, tasks in wildcard_tasks.items(): - async for task in as_completed(tasks): + for task in tasks: results = await task if results: is_wildcard = True From ce75405e022e9b8b5f7f24557e65858ec3f12cb8 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Thu, 1 Feb 2024 19:27:47 -0500 Subject: [PATCH 02/41] fixed resolve_batch bug --- bbot/core/helpers/dns.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bbot/core/helpers/dns.py b/bbot/core/helpers/dns.py index cc3cded6c7..f1c38cad3f 100644 --- a/bbot/core/helpers/dns.py +++ b/bbot/core/helpers/dns.py @@ -648,7 +648,7 @@ async def resolve_batch(self, queries, **kwargs): """ for q in queries: - (q, await self.resolve(q, **kwargs)) + yield (q, await self.resolve(q, **kwargs)) def extract_targets(self, record): """ From 60e095f2eff0bade531199941f9205a04d15d363 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Thu, 1 Feb 2024 19:32:51 -0500 Subject: [PATCH 03/41] multiprocessize collapse_url --- bbot/modules/bevigil.py | 2 +- bbot/modules/wayback.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/bbot/modules/bevigil.py b/bbot/modules/bevigil.py index ff868e969b..435ceae08f 100644 --- a/bbot/modules/bevigil.py +++ b/bbot/modules/bevigil.py @@ -34,7 +34,7 @@ async def handle_event(self, event): if self.urls: urls = await self.query(query, request_fn=self.request_urls, parse_fn=self.parse_urls) if urls: - for parsed_url in await self.scan.run_in_executor(self.helpers.validators.collapse_urls, urls): + for parsed_url in await self.scan.run_in_executor_mp(self.helpers.validators.collapse_urls, urls): await self.emit_event(parsed_url.geturl(), "URL_UNVERIFIED", source=event) async def request_subdomains(self, query): diff --git a/bbot/modules/wayback.py b/bbot/modules/wayback.py index bf4fb769e6..92dc78db5c 100644 --- a/bbot/modules/wayback.py +++ b/bbot/modules/wayback.py @@ -56,7 +56,7 @@ async def query(self, query): dns_names = set() collapsed_urls = 0 start_time = datetime.now() - parsed_urls = await self.scan.run_in_executor( + parsed_urls = await self.scan.run_in_executor_mp( self.helpers.validators.collapse_urls, urls, threshold=self.garbage_threshold, From b6babc78024fde6d0117ad1117a9b0bd223108cd Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Thu, 1 Feb 2024 19:41:38 -0500 Subject: [PATCH 04/41] logging for wildcards --- bbot/core/helpers/dns.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bbot/core/helpers/dns.py b/bbot/core/helpers/dns.py index f1c38cad3f..4d9f4e4978 100644 --- a/bbot/core/helpers/dns.py +++ b/bbot/core/helpers/dns.py @@ -969,6 +969,8 @@ async def is_wildcard_domain(self, domain, log_info=False): if log_info: log_fn = log.info log_fn(f"Encountered domain with wildcard DNS ({wildcard_rdtypes_str}): {host}") + else: + log.verbose(f"Finished checking {host}, it is not a wildcard") return wildcard_domain_results From a06081527d524c180b69617807b97643d65764ed Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Thu, 1 Feb 2024 19:57:29 -0500 Subject: [PATCH 05/41] make sure things are awaited --- bbot/core/helpers/dns.py | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/bbot/core/helpers/dns.py b/bbot/core/helpers/dns.py index 4d9f4e4978..40b7ec1e11 100644 --- a/bbot/core/helpers/dns.py +++ b/bbot/core/helpers/dns.py @@ -526,9 +526,8 @@ async def resolve_event(self, event, minimal=False): types = ("A", "AAAA") if types: - tasks = [self.resolve_raw(event_host, type=t, use_cache=True) for t in types] - async for task in as_completed(tasks): - resolved_raw, errors = await task + for t in types: + resolved_raw, errors = await self.resolve_raw(event_host, type=t, use_cache=True) for rdtype, e in errors: if rdtype not in resolved_raw: event_tags.add(f"{rdtype.lower()}-error") @@ -821,9 +820,8 @@ async def is_wildcard(self, query, ips=None, rdtype=None): # if the caller hasn't already done the work of resolving the IPs if ips is None: # then resolve the query for all rdtypes - base_query_tasks = {t: self.resolve_raw(query, type=t, use_cache=True) for t in rdtypes_to_check} - for _rdtype, task in base_query_tasks.items(): - raw_results, errors = await task + for t in rdtypes_to_check: + raw_results, errors = await self.resolve_raw(query, type=t, use_cache=True) if errors and not raw_results: self.debug(f"Failed to resolve {query} ({_rdtype}) during wildcard detection") result[_rdtype] = (None, parent) @@ -934,23 +932,17 @@ async def is_wildcard_domain(self, domain, log_info=False): log.verbose(f"Checking if {host} is a wildcard") # determine if this is a wildcard domain - wildcard_tasks = {t: [] for t in rdtypes_to_check} + # resolve a bunch of random subdomains of the same parent + is_wildcard = False + wildcard_results = dict() for rdtype in rdtypes_to_check: # continue if a wildcard was already found for this rdtype # if rdtype in self._wildcard_cache[host_hash]: # continue for _ in range(self.wildcard_tests): rand_query = f"{rand_string(digits=False, length=10)}.{host}" - wildcard_task = self.resolve(rand_query, type=rdtype, use_cache=False) - wildcard_tasks[rdtype].append(wildcard_task) - - # combine the random results - is_wildcard = False - wildcard_results = dict() - for rdtype, tasks in wildcard_tasks.items(): - for task in tasks: - results = await task + results = await self.resolve(rand_query, type=rdtype, use_cache=False) if results: is_wildcard = True if not rdtype in wildcard_results: From 0e6254eb00337c7a377ab492ce473db9b1fb5af4 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Thu, 1 Feb 2024 20:02:22 -0500 Subject: [PATCH 06/41] fix bug in dns.py --- bbot/core/helpers/dns.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bbot/core/helpers/dns.py b/bbot/core/helpers/dns.py index 40b7ec1e11..97696b96bd 100644 --- a/bbot/core/helpers/dns.py +++ b/bbot/core/helpers/dns.py @@ -823,8 +823,8 @@ async def is_wildcard(self, query, ips=None, rdtype=None): for t in rdtypes_to_check: raw_results, errors = await self.resolve_raw(query, type=t, use_cache=True) if errors and not raw_results: - self.debug(f"Failed to resolve {query} ({_rdtype}) during wildcard detection") - result[_rdtype] = (None, parent) + self.debug(f"Failed to resolve {query} ({t}) during wildcard detection") + result[t] = (None, parent) continue for __rdtype, answers in raw_results: base_query_results = set() From bf539b33728c6d5f0ffb93c9b6fe028c33d214e1 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Thu, 1 Feb 2024 20:59:27 -0500 Subject: [PATCH 07/41] flaked --- bbot/core/helpers/dns.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bbot/core/helpers/dns.py b/bbot/core/helpers/dns.py index 97696b96bd..0c1c17c01e 100644 --- a/bbot/core/helpers/dns.py +++ b/bbot/core/helpers/dns.py @@ -13,7 +13,7 @@ from bbot.core.helpers.ratelimiter import RateLimiter from bbot.core.helpers.async_helpers import NamedLock from bbot.core.errors import ValidationError, DNSError, DNSWildcardBreak -from .misc import is_ip, is_domain, is_dns_name, domain_parents, parent_domain, rand_string, cloudcheck, as_completed +from .misc import is_ip, is_domain, is_dns_name, domain_parents, parent_domain, rand_string, cloudcheck log = logging.getLogger("bbot.core.helpers.dns") From 19fe685bad40452b444b1e339daae9ebaaff1dfd Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Thu, 1 Feb 2024 21:16:38 -0500 Subject: [PATCH 08/41] fix rare dns bug, verbosify abort_if --- bbot/core/helpers/dns.py | 2 -- bbot/scanner/manager.py | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/bbot/core/helpers/dns.py b/bbot/core/helpers/dns.py index 0c1c17c01e..74dc43a4af 100644 --- a/bbot/core/helpers/dns.py +++ b/bbot/core/helpers/dns.py @@ -950,8 +950,6 @@ async def is_wildcard_domain(self, domain, log_info=False): wildcard_results[rdtype].update(results) # we know this rdtype is a wildcard # so we don't need to check it anymore - with suppress(KeyError): - rdtypes_to_check.remove(rdtype) self._wildcard_cache.update({host_hash: wildcard_results}) wildcard_domain_results.update({host: wildcard_results}) diff --git a/bbot/scanner/manager.py b/bbot/scanner/manager.py index c0f5982304..4c08ff8477 100644 --- a/bbot/scanner/manager.py +++ b/bbot/scanner/manager.py @@ -260,7 +260,7 @@ async def _emit_event(self, event, **kwargs): abort_result, reason = abort_result msg += f": {reason}" if abort_result: - log.debug(msg) + log.verbose(msg) return # run success callback before distributing event (so it can add tags, etc.) From 126e39b21b89bf31333b2328d05f3ed6a7d2901c Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Thu, 1 Feb 2024 21:35:38 -0500 Subject: [PATCH 09/41] limit anubisdb due to excessive garbage results --- bbot/modules/anubisdb.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/bbot/modules/anubisdb.py b/bbot/modules/anubisdb.py index 7b0cda171a..2991222953 100644 --- a/bbot/modules/anubisdb.py +++ b/bbot/modules/anubisdb.py @@ -6,6 +6,8 @@ class anubisdb(subdomain_enum): watched_events = ["DNS_NAME"] produced_events = ["DNS_NAME"] meta = {"description": "Query jldc.me's database for subdomains"} + options = {"limit": 1000} + options_desc = {"limit": "Limit the number of subdomains returned per query (increasing this may slow the scan due to garbage results from this API)"} base_url = "https://jldc.me/anubis/subdomains" dns_abort_depth = 5 @@ -38,4 +40,4 @@ def parse_results(self, r, query): hostname = str(hostname).lower() if hostname.endswith(f".{query}") and not self.abort_if_pre(hostname): results.add(hostname) - return results + return sorted(results)[:self.config.get("limit", 1000)] From 99042a8c39a8e90631cb54533c3cf2101c778fe8 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Thu, 1 Feb 2024 21:41:01 -0500 Subject: [PATCH 10/41] clean up code --- bbot/modules/anubisdb.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/bbot/modules/anubisdb.py b/bbot/modules/anubisdb.py index 2991222953..ceaed2bbba 100644 --- a/bbot/modules/anubisdb.py +++ b/bbot/modules/anubisdb.py @@ -38,6 +38,9 @@ def parse_results(self, r, query): if json: for hostname in json: hostname = str(hostname).lower() - if hostname.endswith(f".{query}") and not self.abort_if_pre(hostname): + in_scope = hostname.endswith(f".{query}") + is_ptr = self.helpers.is_ptr(hostname) + too_long = self.abort_if_pre(hostname) + if in_scope and not is_ptr and not too_long: results.add(hostname) return sorted(results)[:self.config.get("limit", 1000)] From 9c92e937c2d030bfec0241ef955e96f8f86dd908 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Thu, 1 Feb 2024 21:41:23 -0500 Subject: [PATCH 11/41] blacked --- bbot/modules/anubisdb.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/bbot/modules/anubisdb.py b/bbot/modules/anubisdb.py index ceaed2bbba..9864e3c6d8 100644 --- a/bbot/modules/anubisdb.py +++ b/bbot/modules/anubisdb.py @@ -7,7 +7,9 @@ class anubisdb(subdomain_enum): produced_events = ["DNS_NAME"] meta = {"description": "Query jldc.me's database for subdomains"} options = {"limit": 1000} - options_desc = {"limit": "Limit the number of subdomains returned per query (increasing this may slow the scan due to garbage results from this API)"} + options_desc = { + "limit": "Limit the number of subdomains returned per query (increasing this may slow the scan due to garbage results from this API)" + } base_url = "https://jldc.me/anubis/subdomains" dns_abort_depth = 5 @@ -43,4 +45,4 @@ def parse_results(self, r, query): too_long = self.abort_if_pre(hostname) if in_scope and not is_ptr and not too_long: results.add(hostname) - return sorted(results)[:self.config.get("limit", 1000)] + return sorted(results)[: self.config.get("limit", 1000)] From 43133a1422a162f15c9382698b7fe090b71629ec Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Fri, 2 Feb 2024 10:55:17 -0500 Subject: [PATCH 12/41] remove custom cache, use cachetools --- bbot/core/helpers/async_helpers.py | 9 ++- bbot/core/helpers/cache.py | 83 --------------------------- bbot/core/helpers/dns.py | 9 +-- bbot/core/helpers/helper.py | 2 +- bbot/modules/output/emails.py | 7 ++- bbot/modules/output/subdomains.py | 4 +- bbot/test/test_step_1/test_helpers.py | 14 ----- poetry.lock | 13 ++++- pyproject.toml | 1 + 9 files changed, 32 insertions(+), 110 deletions(-) diff --git a/bbot/core/helpers/async_helpers.py b/bbot/core/helpers/async_helpers.py index 4cc7011615..e73afc5156 100644 --- a/bbot/core/helpers/async_helpers.py +++ b/bbot/core/helpers/async_helpers.py @@ -5,13 +5,12 @@ import threading from datetime import datetime from queue import Queue, Empty +from cachetools import LRUCache from .misc import human_timedelta from contextlib import asynccontextmanager log = logging.getLogger("bbot.core.helpers.async_helpers") -from .cache import CacheDict - class ShuffleQueue(asyncio.Queue): def _put(self, item): @@ -37,15 +36,15 @@ class NamedLock: """ def __init__(self, max_size=1000): - self._cache = CacheDict(max_size=max_size) + self._cache = LRUCache(maxsize=max_size) @asynccontextmanager async def lock(self, name): try: - lock = self._cache.get(name) + lock = self._cache[name] except KeyError: lock = _Lock(name) - self._cache.put(name, lock) + self._cache[name] = lock async with lock: yield diff --git a/bbot/core/helpers/cache.py b/bbot/core/helpers/cache.py index 3eb54daf76..3a70fbd248 100644 --- a/bbot/core/helpers/cache.py +++ b/bbot/core/helpers/cache.py @@ -1,8 +1,6 @@ import os import time import logging -from contextlib import suppress -from collections import OrderedDict from .misc import sha1 @@ -53,84 +51,3 @@ def is_cached(self, key, cache_hrs=24 * 7): def cache_filename(self, key): return self.cache_dir / sha1(key).hexdigest() - - -_sentinel = object() - - -class CacheDict: - """ - Dictionary to store cached values, with a maximum size limit - """ - - def __init__(self, max_size=1000): - self._cache = OrderedDict() - self._max_size = int(max_size) - - def get(self, name, fallback=_sentinel): - name_hash = self._hash(name) - try: - return self._cache[name_hash] - except KeyError: - if fallback is not _sentinel: - return fallback - raise - finally: - with suppress(KeyError): - self._cache.move_to_end(name_hash) - self._truncate() - - def put(self, name, value): - name_hash = self._hash(name) - try: - self._cache[name_hash] = value - finally: - with suppress(KeyError): - self._cache.move_to_end(name_hash) - self._truncate() - - def _truncate(self): - if not self or len(self) <= self._max_size: - return - for nh in list(self._cache.keys()): - try: - del self._cache[nh] - except KeyError: - pass - if not self or len(self) <= self._max_size: - break - - def keys(self): - return self._cache.keys() - - def values(self): - return self._cache.values() - - def items(self): - return self._cache.items() - - def clear(self): - return self._cache.clear() - - def _hash(self, v): - if type(v) == int: - return v - return hash(str(v)) - - def __contains__(self, item): - return self._hash(item) in self._cache - - def __iter__(self): - return iter(self._cache) - - def __getitem__(self, item): - return self.get(item) - - def __setitem__(self, item, value): - self.put(item, value) - - def __bool__(self): - return bool(self._cache) - - def __len__(self): - return len(self._cache) diff --git a/bbot/core/helpers/dns.py b/bbot/core/helpers/dns.py index 74dc43a4af..35cf97c8bd 100644 --- a/bbot/core/helpers/dns.py +++ b/bbot/core/helpers/dns.py @@ -7,6 +7,7 @@ import contextlib import dns.exception import dns.asyncresolver +from cachetools import LRUCache from contextlib import suppress from .regexes import dns_name_regex @@ -64,8 +65,8 @@ class DNSHelper: wildcard_ignore (tuple): Domains to be ignored during wildcard detection. wildcard_tests (int): Number of tests to be run for wildcard detection. Defaults to 5. _wildcard_cache (dict): Cache for wildcard detection results. - _dns_cache (CacheDict): Cache for DNS resolution results, limited in size. - _event_cache (CacheDict): Cache for event resolution results, tags. Limited in size. + _dns_cache (LRUCache): Cache for DNS resolution results, limited in size. + _event_cache (LRUCache): Cache for event resolution results, tags. Limited in size. resolver_file (Path): File containing system's current resolver nameservers. filter_bad_ptrs (bool): Whether to filter out DNS names that appear to be auto-generated PTR records. Defaults to True. @@ -130,8 +131,8 @@ def __init__(self, parent_helper): self.fallback_nameservers_file = self.parent_helper.wordlist_dir / "nameservers.txt" self._debug = self.parent_helper.config.get("dns_debug", False) self._dummy_modules = dict() - self._dns_cache = self.parent_helper.CacheDict(max_size=100000) - self._event_cache = self.parent_helper.CacheDict(max_size=10000) + self._dns_cache = LRUCache(maxsize=10000) + self._event_cache = LRUCache(maxsize=10000) self._event_cache_locks = NamedLock() # for mocking DNS queries diff --git a/bbot/core/helpers/helper.py b/bbot/core/helpers/helper.py index 36c6346c93..899f3ab0bc 100644 --- a/bbot/core/helpers/helper.py +++ b/bbot/core/helpers/helper.py @@ -48,8 +48,8 @@ class ConfigAwareHelper: from . import regexes from . import validators from .files import tempfile, feed_pipe, _feed_pipe, tempfile_tail + from .cache import cache_get, cache_put, cache_filename, is_cached from .command import run, run_live, _spawn_proc, _prepare_command_kwargs - from .cache import cache_get, cache_put, cache_filename, is_cached, CacheDict def __init__(self, config, scan=None): self.config = config diff --git a/bbot/modules/output/emails.py b/bbot/modules/output/emails.py index 029bc5aca7..e96c5d97c4 100644 --- a/bbot/modules/output/emails.py +++ b/bbot/modules/output/emails.py @@ -12,14 +12,19 @@ class Emails(Human): output_filename = "emails.txt" + async def setup(self): + self.emails_written = 0 + return await super().setup() + def _scope_distance_check(self, event): return BaseModule._scope_distance_check(self, event) async def handle_event(self, event): if self.file is not None: + self.emails_written += 1 self.file.write(f"{event.data}\n") self.file.flush() async def report(self): if getattr(self, "_file", None) is not None: - self.info(f"Saved email addresses to {self.output_file}") + self.info(f"Saved {self.emails_written:,} email addresses to {self.output_file}") diff --git a/bbot/modules/output/subdomains.py b/bbot/modules/output/subdomains.py index 49dea2db8f..bfb7174ac8 100644 --- a/bbot/modules/output/subdomains.py +++ b/bbot/modules/output/subdomains.py @@ -15,6 +15,7 @@ class Subdomains(Human): async def setup(self): self.include_unresolved = self.config.get("include_unresolved", False) + self.subdomains_written = 0 return await super().setup() async def filter_event(self, event): @@ -27,9 +28,10 @@ def _scope_distance_check(self, event): async def handle_event(self, event): if self.file is not None: + self.subdomains_written += 1 self.file.write(f"{event.data}\n") self.file.flush() async def report(self): if getattr(self, "_file", None) is not None: - self.info(f"Saved subdomains to {self.output_file}") + self.info(f"Saved {self.subdomains_written:,} subdomains to {self.output_file}") diff --git a/bbot/test/test_step_1/test_helpers.py b/bbot/test/test_step_1/test_helpers.py index b972c8561f..ba72d41dca 100644 --- a/bbot/test/test_step_1/test_helpers.py +++ b/bbot/test/test_step_1/test_helpers.py @@ -441,20 +441,6 @@ async def test_helpers_misc(helpers, scan, bbot_scanner, bbot_config, bbot_https assert helpers.cache_get("string", cache_hrs=24 * 7) is None assert helpers.cache_get("string", cache_hrs=24 * 14) == "wat" - cache_dict = helpers.CacheDict(max_size=10) - cache_dict.put("1", 2) - assert cache_dict["1"] == 2 - assert cache_dict.get("1") == 2 - assert len(cache_dict) == 1 - cache_dict["2"] = 3 - assert cache_dict["2"] == 3 - assert cache_dict.get("2") == 3 - assert len(cache_dict) == 2 - for i in range(20): - cache_dict[str(i)] = i + 1 - assert len(cache_dict) == 10 - assert tuple(cache_dict) == tuple(hash(str(x)) for x in range(10, 20)) - test_file = Path(scan.config["home"]) / "testfile.asdf" with open(test_file, "w") as f: for i in range(100): diff --git a/poetry.lock b/poetry.lock index d1204c6702..3a7c834a5f 100644 --- a/poetry.lock +++ b/poetry.lock @@ -186,6 +186,17 @@ d = ["aiohttp (>=3.7.4)", "aiohttp (>=3.7.4,!=3.9.0)"] jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"] uvloop = ["uvloop (>=0.15.2)"] +[[package]] +name = "cachetools" +version = "5.3.2" +description = "Extensible memoizing collections and decorators" +optional = false +python-versions = ">=3.7" +files = [ + {file = "cachetools-5.3.2-py3-none-any.whl", hash = "sha256:861f35a13a451f94e301ce2bec7cac63e881232ccce7ed67fab9b5df4d3beaa1"}, + {file = "cachetools-5.3.2.tar.gz", hash = "sha256:086ee420196f7b2ab9ca2db2520aca326318b68fe5ba8bc4d49cca91add450f2"}, +] + [[package]] name = "certifi" version = "2023.11.17" @@ -2409,4 +2420,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "8d9864610f54050aec62bf75415e5b683a851323d054a38ff36e54d9d5c284e3" +content-hash = "4eb296ea314405bf39920f67d20eebb13cc8974254fd1643538bcb3a338976d2" diff --git a/pyproject.toml b/pyproject.toml index 93172c0609..7d1b2fb327 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,6 +46,7 @@ pydantic = "^2.4.2" httpx = "^0.26.0" cloudcheck = "^2.1.0.181" tldextract = "^5.1.1" +cachetools = "^5.3.2" [tool.poetry.group.dev.dependencies] flake8 = "^6.0.0" From d2fbaf52079f90eb636ff3449c840eaf531bca90 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Fri, 2 Feb 2024 14:58:30 -0500 Subject: [PATCH 13/41] increase max dnscommonsrv handlers, small masscan bugfix --- bbot/modules/dnscommonsrv.py | 2 +- bbot/modules/masscan.py | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/bbot/modules/dnscommonsrv.py b/bbot/modules/dnscommonsrv.py index 958b6b6127..4d7e09e4b7 100644 --- a/bbot/modules/dnscommonsrv.py +++ b/bbot/modules/dnscommonsrv.py @@ -94,7 +94,7 @@ class dnscommonsrv(BaseModule): produced_events = ["DNS_NAME"] flags = ["subdomain-enum", "passive", "safe"] meta = {"description": "Check for common SRV records"} - _max_event_handlers = 5 + _max_event_handlers = 10 async def filter_event(self, event): # skip SRV wildcards diff --git a/bbot/modules/masscan.py b/bbot/modules/masscan.py index 15881d5b2f..895ffe2431 100644 --- a/bbot/modules/masscan.py +++ b/bbot/modules/masscan.py @@ -241,9 +241,11 @@ async def emit_from_cache(self): await self.emit_event(line, "OPEN_TCP_PORT", source=source_event) def get_source_event(self, host): - source_event = self.scan.whitelist.get(host) + source_event = self.scan.target.get(host) if source_event is None: - source_event = self.scan.root_event + source_event = self.scan.whitelist.get(host) + if source_event is None: + source_event = self.scan.root_event return source_event async def cleanup(self): From 2958a3b957db43bdfdbee95622ad36ca1f48a532 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Sat, 3 Feb 2024 17:33:05 -0500 Subject: [PATCH 14/41] massdns speed optimizations --- bbot/modules/massdns.py | 73 +++++++++++++++++++++++++---------------- 1 file changed, 45 insertions(+), 28 deletions(-) diff --git a/bbot/modules/massdns.py b/bbot/modules/massdns.py index 965909d317..1bce928def 100644 --- a/bbot/modules/massdns.py +++ b/bbot/modules/massdns.py @@ -1,6 +1,7 @@ import re import json import random +import asyncio import subprocess from bbot.modules.templates.subdomain_enum import subdomain_enum @@ -13,8 +14,8 @@ class massdns(subdomain_enum): It uses massdns to brute-force subdomains. At the end of a scan, it will leverage BBOT's word cloud to recursively discover target-specific subdomain mutations. - Each subdomain discovered via mutations is tagged with the "mutation" tag. This tag includes the depth at which - the mutations is found. I.e. the first mutation will be tagged "mutation-1". The second one (a mutation of a + Each subdomain discovered via mutations is tagged with the "mutation" tag. This tag indicates the depth at which + the mutation was found. I.e. the first mutation will be tagged "mutation-1". The second one (a mutation of a mutation) will be "mutation-2". Mutations of mutations of mutations will be "mutation-3", etc. This is especially use for bug bounties because it enables you to recognize distant/rare subdomains at a glance. @@ -97,7 +98,10 @@ async def setup(self): cache_hrs=24 * 7, ) self.devops_mutations = list(self.helpers.word_cloud.devops_mutations) - self._mutation_run = 1 + self.mutation_run = 1 + + self.resolve_and_emit_queue = asyncio.Queue() + self.resolve_and_emit_task = asyncio.create_task(self.resolve_and_emit()) return await super().setup() async def filter_event(self, event): @@ -116,23 +120,19 @@ async def filter_event(self, event): async def handle_event(self, event): query = self.make_query(event) self.source_events.add_target(event) - self.info(f"Brute-forcing subdomains for {query} (source: {event.data})") - for hostname in await self.massdns(query, self.subdomain_list): - await self.emit_result(hostname, event, query) + results = await self.massdns(query, self.subdomain_list) + await self.resolve_and_emit_queue.put((results, event, None)) def abort_if(self, event): if not event.scope_distance == 0: return True, "event is not in scope" if "wildcard" in event.tags: return True, "event is a wildcard" - - async def emit_result(self, result, source_event, query, tags=None): - if not result == source_event: - kwargs = {"abort_if": self.abort_if} - if tags is not None: - kwargs["tags"] = tags - await self.emit_event(result, "DNS_NAME", source_event, **kwargs) + if "unresolved" in event.tags: + self.critical(f"{event} IS UNRESOLVED") + return True, "event is unresolved" + return False, "" def already_processed(self, hostname): if hash(hostname) in self.processed: @@ -204,12 +204,36 @@ async def massdns(self, domain, subdomains): ) # everything checks out - self.verbose(f"Resolving batch of {len(results):,} results") - resolved = dict([l async for l in self.helpers.resolve_batch(results, type=("A", "CNAME"))]) - resolved = {k: v for k, v in resolved.items() if v} - for hostname in resolved: - self.add_found(hostname) - return list(resolved) + return results + + async def resolve_and_emit(self): + """ + When results are found, they are placed into self.resolve_and_emit_queue. + The purpose of this function (which is started as a task in the module's setup()) is to consume results from + the queue, resolve them, and if they resolve, emit them. + + This exists to prevent disrupting the scan with huge batches of DNS resolutions. + """ + while 1: + results, source_event, tags = await self.resolve_and_emit_queue.get() + self.verbose(f"Resolving batch of {len(results):,} results") + async with self._task_counter.count(f"{self.name}.resolve_and_emit()"): + async for hostname, r in self.helpers.resolve_batch(results, type=("A", "CNAME")): + if not r: + self.debug(f"Discarding {hostname} because it didn't resolve") + continue + self.add_found(hostname) + if source_event is None: + source_event = self.source_events.get(hostname) + if source_event is None: + self.warning(f"Could not correlate source event from: {hostname}") + source_event = self.scan.root_event + kwargs = {"abort_if": self.abort_if, "tags": tags} + await self.emit_event(hostname, "DNS_NAME", source_event, **kwargs) + + @property + def running(self): + return super().running or self.resolve_and_emit_queue.qsize() > 0 async def _canary_check(self, domain, num_checks=50): random_subdomains = list(self.gen_random_subdomains(num_checks)) @@ -374,15 +398,8 @@ def add_mutation(_domain_hash, m): if mutations: self.info(f"Trying {len(mutations):,} mutations against {domain} ({i+1}/{len(found)})") results = list(await self.massdns(query, mutations)) - for hostname in results: - source_event = self.source_events.get(hostname) - if source_event is None: - self.warning(f"Could not correlate source event from: {hostname}") - source_event = self.scan.root_event - await self.emit_result( - hostname, source_event, query, tags=[f"mutation-{self._mutation_run}"] - ) if results: + await self.resolve_and_emit_queue.put((results, None, [f"mutation-{self.mutation_run}"])) found_mutations = True continue break @@ -390,7 +407,7 @@ def add_mutation(_domain_hash, m): self.warning(e) if found_mutations: - self._mutation_run += 1 + self.mutation_run += 1 def add_found(self, host): if not isinstance(host, str): From fd984cd15920920f73dc9af717f536d189e6a74f Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Sat, 3 Feb 2024 18:18:31 -0500 Subject: [PATCH 15/41] dnscommonsrv rework, spellchecking --- bbot/core/helpers/async_helpers.py | 2 +- bbot/core/helpers/depsinstaller/installer.py | 4 +- bbot/core/helpers/misc.py | 14 +- bbot/core/helpers/ntlm.py | 8 +- bbot/core/helpers/web.py | 2 +- bbot/modules/base.py | 2 +- bbot/modules/deadly/ffuf.py | 2 +- bbot/modules/deadly/nuclei.py | 14 +- bbot/modules/dnscommonsrv.py | 246 +++++++++++------- bbot/modules/ffuf_shortnames.py | 22 +- bbot/modules/github_codesearch.py | 4 +- bbot/test/test_step_1/test_helpers.py | 9 + bbot/test/test_step_1/test_web.py | 2 +- .../test_module_ffuf_shortnames.py | 18 +- .../module_tests/test_module_telerik.py | 2 +- 15 files changed, 219 insertions(+), 132 deletions(-) diff --git a/bbot/core/helpers/async_helpers.py b/bbot/core/helpers/async_helpers.py index e73afc5156..8434ccb0f7 100644 --- a/bbot/core/helpers/async_helpers.py +++ b/bbot/core/helpers/async_helpers.py @@ -31,7 +31,7 @@ class NamedLock: """ Returns a unique asyncio.Lock() based on a provided string - Useful for preventing multiple operations from occuring on the same data in parallel + Useful for preventing multiple operations from occurring on the same data in parallel E.g. simultaneous DNS lookups on the same hostname """ diff --git a/bbot/core/helpers/depsinstaller/installer.py b/bbot/core/helpers/depsinstaller/installer.py index 00662b969d..049baef864 100644 --- a/bbot/core/helpers/depsinstaller/installer.py +++ b/bbot/core/helpers/depsinstaller/installer.py @@ -157,9 +157,9 @@ async def pip_install(self, packages, constraints=None): command = [sys.executable, "-m", "pip", "install", "--upgrade"] + packages if constraints: - contraints_tempfile = self.parent_helper.tempfile(constraints, pipe=False) + constraints_tempfile = self.parent_helper.tempfile(constraints, pipe=False) command.append("--constraint") - command.append(contraints_tempfile) + command.append(constraints_tempfile) process = None try: diff --git a/bbot/core/helpers/misc.py b/bbot/core/helpers/misc.py index 283bc37828..d02f50584a 100644 --- a/bbot/core/helpers/misc.py +++ b/bbot/core/helpers/misc.py @@ -67,8 +67,11 @@ def is_domain(d): """ d, _ = split_host_port(d) extracted = tldextract(d) - if extracted.domain and not extracted.subdomain: - return True + if extracted.registered_domain: + if not extracted.subdomain: + return True + else: + return d.count(".") == 1 return False @@ -97,8 +100,11 @@ def is_subdomain(d): """ d, _ = split_host_port(d) extracted = tldextract(d) - if extracted.domain and extracted.subdomain: - return True + if extracted.registered_domain: + if extracted.subdomain: + return True + else: + return d.count(".") > 1 return False diff --git a/bbot/core/helpers/ntlm.py b/bbot/core/helpers/ntlm.py index e4d9cd1ca0..8605ef34af 100644 --- a/bbot/core/helpers/ntlm.py +++ b/bbot/core/helpers/ntlm.py @@ -38,7 +38,7 @@ def __init__(self, pos_tup, raw): def decode_ntlm_challenge(st): hdr_tup = struct.unpack(" Date: Sat, 3 Feb 2024 18:29:21 -0500 Subject: [PATCH 16/41] fix tests --- bbot/core/helpers/misc.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/bbot/core/helpers/misc.py b/bbot/core/helpers/misc.py index d02f50584a..43fd0595f1 100644 --- a/bbot/core/helpers/misc.py +++ b/bbot/core/helpers/misc.py @@ -66,6 +66,8 @@ def is_domain(d): - Port, if present in input, is ignored. """ d, _ = split_host_port(d) + if is_ip(d): + return False extracted = tldextract(d) if extracted.registered_domain: if not extracted.subdomain: @@ -99,6 +101,8 @@ def is_subdomain(d): - Port, if present in input, is ignored. """ d, _ = split_host_port(d) + if is_ip(d): + return False extracted = tldextract(d) if extracted.registered_domain: if extracted.subdomain: @@ -607,9 +611,6 @@ def is_ip(d, version=None): >>> is_ip('evilcorp.com') False """ - if isinstance(d, (ipaddress.IPv4Address, ipaddress.IPv6Address)): - if version is None or version == d.version: - return True try: ip = ipaddress.ip_address(d) if version is None or ip.version == version: From d11ad068b9465a0347b4e19de990727c143c291b Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Sat, 3 Feb 2024 18:54:31 -0500 Subject: [PATCH 17/41] fix scope accuracy tests --- .../test_step_1/test_manager_scope_accuracy.py | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/bbot/test/test_step_1/test_manager_scope_accuracy.py b/bbot/test/test_step_1/test_manager_scope_accuracy.py index e8e5da391e..a927da8a3b 100644 --- a/bbot/test/test_step_1/test_manager_scope_accuracy.py +++ b/bbot/test/test_step_1/test_manager_scope_accuracy.py @@ -681,7 +681,7 @@ def custom_setup(scan): assert 0 == len([e for e in events if e.type == "OPEN_TCP_PORT" and e.data == "test.notreal:9999"]) assert 0 == len([e for e in events if e.type == "DNS_NAME_UNRESOLVED" and e.data == "notreal"]) - assert len(all_events) == 14 + assert len(all_events) == 13 assert 1 == len([e for e in all_events if e.type == "IP_RANGE" and e.data == "127.0.0.0/31" and e.internal == False and e.scope_distance == 0]) assert 1 == len([e for e in all_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.0" and e.internal == True and e.scope_distance == 0]) assert 2 == len([e for e in all_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.1" and e.internal == False and e.scope_distance == 0]) @@ -692,9 +692,8 @@ def custom_setup(scan): assert 1 == len([e for e in all_events if e.type == "OPEN_TCP_PORT" and e.data == "www.bbottest.notreal:9999" and e.internal == True and e.scope_distance == 1 and str(e.module) == "speculate"]) assert 1 == len([e for e in all_events if e.type == "DNS_NAME_UNRESOLVED" and e.data == "bbottest.notreal" and e.internal == True and e.scope_distance == 2 and str(e.module) == "speculate"]) assert 1 == len([e for e in all_events if e.type == "OPEN_TCP_PORT" and e.data == "test.notreal:9999" and e.internal == True and e.scope_distance == 0 and str(e.module) == "speculate"]) - assert 1 == len([e for e in all_events if e.type == "DNS_NAME_UNRESOLVED" and e.data == "notreal" and e.internal == True and e.scope_distance == 1 and str(e.module) == "speculate"]) - assert len(all_events_nodups) == 12 + assert len(all_events_nodups) == 11 assert 1 == len([e for e in all_events_nodups if e.type == "IP_RANGE" and e.data == "127.0.0.0/31" and e.internal == False and e.scope_distance == 0]) assert 1 == len([e for e in all_events_nodups if e.type == "IP_ADDRESS" and e.data == "127.0.0.0" and e.internal == True and e.scope_distance == 0]) assert 1 == len([e for e in all_events_nodups if e.type == "IP_ADDRESS" and e.data == "127.0.0.1" and e.internal == False and e.scope_distance == 0]) @@ -705,7 +704,6 @@ def custom_setup(scan): assert 1 == len([e for e in all_events_nodups if e.type == "OPEN_TCP_PORT" and e.data == "www.bbottest.notreal:9999" and e.internal == True and e.scope_distance == 1 and str(e.module) == "speculate"]) assert 1 == len([e for e in all_events_nodups if e.type == "DNS_NAME_UNRESOLVED" and e.data == "bbottest.notreal" and e.internal == True and e.scope_distance == 2 and str(e.module) == "speculate"]) assert 1 == len([e for e in all_events_nodups if e.type == "OPEN_TCP_PORT" and e.data == "test.notreal:9999" and e.internal == True and e.scope_distance == 0 and str(e.module) == "speculate"]) - assert 1 == len([e for e in all_events_nodups if e.type == "DNS_NAME_UNRESOLVED" and e.data == "notreal" and e.internal == True and e.scope_distance == 1 and str(e.module) == "speculate"]) for _graph_output_events in (graph_output_events, graph_output_batch_events): assert len(_graph_output_events) == 6 @@ -719,7 +717,6 @@ def custom_setup(scan): assert 0 == len([e for e in _graph_output_events if e.type == "OPEN_TCP_PORT" and e.data == "www.bbottest.notreal:9999"]) assert 0 == len([e for e in _graph_output_events if e.type == "DNS_NAME_UNRESOLVED" and e.data == "bbottest.notreal"]) assert 0 == len([e for e in _graph_output_events if e.type == "OPEN_TCP_PORT" and e.data == "test.notreal:9999"]) - assert 0 == len([e for e in _graph_output_events if e.type == "DNS_NAME_UNRESOLVED" and e.data == "notreal"]) # sslcert with out-of-scope chain events, all_events, all_events_nodups, graph_output_events, graph_output_batch_events = await do_scan( @@ -739,9 +736,8 @@ def custom_setup(scan): assert 1 == len([e for e in events if e.type == "DNS_NAME" and e.data == "test.notreal" and e.internal == False and e.scope_distance == 0 and str(e.module) == "sslcert"]) assert 0 == len([e for e in events if e.type == "DNS_NAME" and e.data == "www.bbottest.notreal"]) assert 0 == len([e for e in events if e.type == "OPEN_TCP_PORT" and e.data == "test.notreal:9999"]) - assert 0 == len([e for e in events if e.type == "DNS_NAME_UNRESOLVED" and e.data == "notreal"]) - assert len(all_events) == 12 + assert len(all_events) == 11 assert 1 == len([e for e in all_events if e.type == "IP_RANGE" and e.data == "127.0.0.0/31" and e.internal == False and e.scope_distance == 0]) assert 1 == len([e for e in all_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.0" and e.internal == True and e.scope_distance == 1]) assert 2 == len([e for e in all_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.1" and e.internal == True and e.scope_distance == 1]) @@ -750,9 +746,8 @@ def custom_setup(scan): assert 1 == len([e for e in all_events if e.type == "DNS_NAME" and e.data == "test.notreal" and e.internal == False and e.scope_distance == 0 and str(e.module) == "sslcert"]) assert 1 == len([e for e in all_events if e.type == "DNS_NAME" and e.data == "www.bbottest.notreal" and e.internal == True and e.scope_distance == 2 and str(e.module) == "sslcert"]) assert 1 == len([e for e in all_events if e.type == "OPEN_TCP_PORT" and e.data == "test.notreal:9999" and e.internal == True and e.scope_distance == 0 and str(e.module) == "speculate"]) - assert 1 == len([e for e in all_events if e.type == "DNS_NAME_UNRESOLVED" and e.data == "notreal" and e.internal == True and e.scope_distance == 1 and str(e.module) == "speculate"]) - assert len(all_events_nodups) == 10 + assert len(all_events_nodups) == 9 assert 1 == len([e for e in all_events_nodups if e.type == "IP_RANGE" and e.data == "127.0.0.0/31" and e.internal == False and e.scope_distance == 0]) assert 1 == len([e for e in all_events_nodups if e.type == "IP_ADDRESS" and e.data == "127.0.0.0" and e.internal == True and e.scope_distance == 1]) assert 1 == len([e for e in all_events_nodups if e.type == "IP_ADDRESS" and e.data == "127.0.0.1" and e.internal == True and e.scope_distance == 1]) @@ -761,7 +756,6 @@ def custom_setup(scan): assert 1 == len([e for e in all_events_nodups if e.type == "DNS_NAME" and e.data == "test.notreal" and e.internal == False and e.scope_distance == 0 and str(e.module) == "sslcert"]) assert 1 == len([e for e in all_events_nodups if e.type == "DNS_NAME" and e.data == "www.bbottest.notreal" and e.internal == True and e.scope_distance == 2 and str(e.module) == "sslcert"]) assert 1 == len([e for e in all_events_nodups if e.type == "OPEN_TCP_PORT" and e.data == "test.notreal:9999" and e.internal == True and e.scope_distance == 0 and str(e.module) == "speculate"]) - assert 1 == len([e for e in all_events_nodups if e.type == "DNS_NAME_UNRESOLVED" and e.data == "notreal" and e.internal == True and e.scope_distance == 1 and str(e.module) == "speculate"]) for _graph_output_events in (graph_output_events, graph_output_batch_events): assert len(_graph_output_events) == 5 @@ -773,7 +767,6 @@ def custom_setup(scan): assert 1 == len([e for e in graph_output_events if e.type == "DNS_NAME" and e.data == "test.notreal" and e.internal == False and e.scope_distance == 0 and str(e.module) == "sslcert"]) assert 0 == len([e for e in graph_output_events if e.type == "DNS_NAME" and e.data == "www.bbottest.notreal"]) assert 0 == len([e for e in graph_output_events if e.type == "OPEN_TCP_PORT" and e.data == "test.notreal:9999"]) - assert 0 == len([e for e in graph_output_events if e.type == "DNS_NAME_UNRESOLVED" and e.data == "notreal"]) @pytest.mark.asyncio From 5da0e431d9c682fbb3f7f7705acf004c26842fc9 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Sat, 3 Feb 2024 19:44:26 -0500 Subject: [PATCH 18/41] just telerik things --- bbot/test/test_step_2/module_tests/test_module_telerik.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bbot/test/test_step_2/module_tests/test_module_telerik.py b/bbot/test/test_step_2/module_tests/test_module_telerik.py index 21c4d2b86b..98c511f2ab 100644 --- a/bbot/test/test_step_2/module_tests/test_module_telerik.py +++ b/bbot/test/test_step_2/module_tests/test_module_telerik.py @@ -11,7 +11,7 @@ async def setup_before_prep(self, module_test): # Simulate Telerik.Web.UI.WebResource.axd?type=rau detection expect_args = {"method": "GET", "uri": "/Telerik.Web.UI.WebResource.axd", "query_string": "type=rau"} respond_args = { - "response_data": '{ "message" : "RadAsyncUpload handler is registered successfully, however, it may not be accessed directly." }' + "response_data": '{ "message" : "RadAsyncUpload handler is registered succesfully, however, it may not be accessed directly." }' } module_test.set_expect_requests(expect_args=expect_args, respond_args=respond_args) From 834ed0c21218c2b59aa177943fec48974efa3290 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Sat, 3 Feb 2024 21:32:20 -0500 Subject: [PATCH 19/41] increase dnscommonsrv threads --- bbot/modules/dnscommonsrv.py | 4 ++-- bbot/modules/sitedossier.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/bbot/modules/dnscommonsrv.py b/bbot/modules/dnscommonsrv.py index 4500e7fc0e..eef8e2d8ca 100644 --- a/bbot/modules/dnscommonsrv.py +++ b/bbot/modules/dnscommonsrv.py @@ -154,12 +154,12 @@ class dnscommonsrv(BaseModule): produced_events = ["DNS_NAME"] flags = ["subdomain-enum", "passive", "safe"] meta = {"description": "Check for common SRV records"} - options = {"top": 50, "max_event_handlers": 5} + options = {"top": 50, "max_event_handlers": 10} options_desc = { "top": "How many of the top SRV records to check", "max_event_handlers": "How many instances of the module to run concurrently", } - _max_event_handlers = 5 + _max_event_handlers = 10 def _incoming_dedup_hash(self, event): # dedupe by parent diff --git a/bbot/modules/sitedossier.py b/bbot/modules/sitedossier.py index 0c797296af..86872c0523 100644 --- a/bbot/modules/sitedossier.py +++ b/bbot/modules/sitedossier.py @@ -43,5 +43,5 @@ async def query(self, query, parse_fn=None, request_fn=None): results.add(hostname) yield hostname if ' Date: Sat, 3 Feb 2024 22:09:59 -0500 Subject: [PATCH 20/41] limit massdns brute force depth --- bbot/core/helpers/misc.py | 17 +++++++++++++++++ bbot/modules/internetdb.py | 3 +-- bbot/modules/massdns.py | 16 +++++++++++++++- bbot/test/test_step_1/test_helpers.py | 6 ++++++ 4 files changed, 39 insertions(+), 3 deletions(-) diff --git a/bbot/core/helpers/misc.py b/bbot/core/helpers/misc.py index 43fd0595f1..aa3b645e07 100644 --- a/bbot/core/helpers/misc.py +++ b/bbot/core/helpers/misc.py @@ -337,6 +337,23 @@ def domain_parents(d, include_self=False): break +def subdomain_depth(d): + """ + Calculate the depth of subdomains within a given domain name. + + Args: + d (str): The domain name to analyze. + + Returns: + int: The depth of the subdomain. For example, a hostname "5.4.3.2.1.evilcorp.com" + has a subdomain depth of 5. + """ + subdomain, domain = split_domain(d) + if not subdomain: + return 0 + return subdomain.count(".") + 1 + + def parent_url(u): """ Retrieve the parent URL of a given URL. diff --git a/bbot/modules/internetdb.py b/bbot/modules/internetdb.py index b3e98b9fca..cc6bde5758 100644 --- a/bbot/modules/internetdb.py +++ b/bbot/modules/internetdb.py @@ -40,8 +40,7 @@ class internetdb(BaseModule): flags = ["passive", "safe", "portscan", "subdomain-enum"] meta = {"description": "Query Shodan's InternetDB for open ports, hostnames, technologies, and vulnerabilities"} - # limit outgoing queue size to help avoid rate limiting - _qsize = 100 + _qsize = 500 base_url = "https://internetdb.shodan.io" diff --git a/bbot/modules/massdns.py b/bbot/modules/massdns.py index 1bce928def..ab913128d7 100644 --- a/bbot/modules/massdns.py +++ b/bbot/modules/massdns.py @@ -30,11 +30,13 @@ class massdns(subdomain_enum): "wordlist": "https://raw.githubusercontent.com/danielmiessler/SecLists/master/Discovery/DNS/subdomains-top1million-5000.txt", "max_resolvers": 1000, "max_mutations": 500, + "max_depth": 5, } options_desc = { "wordlist": "Subdomain wordlist URL", "max_resolvers": "Number of concurrent massdns resolvers", "max_mutations": "Max number of smart mutations per subdomain", + "max_depth": "How many subdomains deep to brute force, i.e. 5.4.3.2.1.evilcorp.com", } subdomain_file = None deps_ansible = [ @@ -90,6 +92,7 @@ async def setup(self): self.max_resolvers = self.config.get("max_resolvers", 1000) self.max_mutations = self.config.get("max_mutations", 500) + self.max_depth = max(1, self.config.get("max_depth", 5)) nameservers_url = ( "https://raw.githubusercontent.com/blacklanternsecurity/public-dns-servers/master/nameservers.txt" ) @@ -107,6 +110,18 @@ async def setup(self): async def filter_event(self, event): query = self.make_query(event) eligible, reason = await self.eligible_for_enumeration(event) + + # limit brute force depth + subdomain_depth = self.helpers.subdomain_depth(query) + 1 + if subdomain_depth > self.max_depth: + eligible = False + reason = f"subdomain depth of *.{event.data} ({subdomain_depth}) > max_depth ({self.max_depth})" + + # don't brute-force things that look like autogenerated PTRs + if self.helpers.is_ptr(query): + eligible = False + reason = f'"{query}" looks like an autogenerated PTR' + if eligible: self.add_found(event) # reject if already processed @@ -130,7 +145,6 @@ def abort_if(self, event): if "wildcard" in event.tags: return True, "event is a wildcard" if "unresolved" in event.tags: - self.critical(f"{event} IS UNRESOLVED") return True, "event is unresolved" return False, "" diff --git a/bbot/test/test_step_1/test_helpers.py b/bbot/test/test_step_1/test_helpers.py index a078110ce3..c8045e5958 100644 --- a/bbot/test/test_step_1/test_helpers.py +++ b/bbot/test/test_step_1/test_helpers.py @@ -182,6 +182,12 @@ async def test_helpers_misc(helpers, scan, bbot_scanner, bbot_config, bbot_https assert helpers.split_domain("192.168.0.1") == ("", "192.168.0.1") assert helpers.split_domain("dead::beef") == ("", "dead::beef") + assert helpers.subdomain_depth("a.s.d.f.evilcorp.co.uk") == 4 + assert helpers.subdomain_depth("a.s.d.f.evilcorp.com") == 4 + assert helpers.subdomain_depth("evilcorp.com") == 0 + assert helpers.subdomain_depth("a.evilcorp.com") == 1 + assert helpers.subdomain_depth("a.s.d.f.evilcorp.notreal") == 4 + assert helpers.split_host_port("https://evilcorp.co.uk") == ("evilcorp.co.uk", 443) assert helpers.split_host_port("http://evilcorp.co.uk:666") == ("evilcorp.co.uk", 666) assert helpers.split_host_port("evilcorp.co.uk:666") == ("evilcorp.co.uk", 666) From cab606df8e3c5b92f390dd172896570d3aac6298 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Sat, 3 Feb 2024 22:24:01 -0500 Subject: [PATCH 21/41] small wildcard tweak --- bbot/core/helpers/dns.py | 5 +++-- bbot/modules/massdns.py | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/bbot/core/helpers/dns.py b/bbot/core/helpers/dns.py index 35cf97c8bd..6bcac62575 100644 --- a/bbot/core/helpers/dns.py +++ b/bbot/core/helpers/dns.py @@ -849,12 +849,13 @@ async def is_wildcard(self, query, ips=None, rdtype=None): # for every parent domain, starting with the shortest try: for host in parents[::-1]: + # make sure we've checked that domain for wildcards + await self.is_wildcard_domain(host) + # for every rdtype for _rdtype in list(base_query_ips): # get the IPs from above query_ips = base_query_ips.get(_rdtype, set()) - # make sure we've checked that domain for wildcards - await self.is_wildcard_domain(host) host_hash = hash(host) if host_hash in self._wildcard_cache: diff --git a/bbot/modules/massdns.py b/bbot/modules/massdns.py index ab913128d7..caf0b7fab9 100644 --- a/bbot/modules/massdns.py +++ b/bbot/modules/massdns.py @@ -115,7 +115,7 @@ async def filter_event(self, event): subdomain_depth = self.helpers.subdomain_depth(query) + 1 if subdomain_depth > self.max_depth: eligible = False - reason = f"subdomain depth of *.{event.data} ({subdomain_depth}) > max_depth ({self.max_depth})" + reason = f"subdomain depth of *.{query} ({subdomain_depth}) > max_depth ({self.max_depth})" # don't brute-force things that look like autogenerated PTRs if self.helpers.is_ptr(query): @@ -127,6 +127,7 @@ async def filter_event(self, event): # reject if already processed if self.already_processed(query): return False, f'Query "{query}" was already processed' + if eligible: self.processed.add(hash(query)) return True, reason From bca67d84adb7ba706e55ea9b1117042fe0367eac Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Sat, 3 Feb 2024 22:29:59 -0500 Subject: [PATCH 22/41] internetdb speed optimization --- bbot/modules/internetdb.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/bbot/modules/internetdb.py b/bbot/modules/internetdb.py index cc6bde5758..5e33f1155c 100644 --- a/bbot/modules/internetdb.py +++ b/bbot/modules/internetdb.py @@ -115,10 +115,11 @@ def get_ip(self, event): elif event.type == "DNS_NAME": # always try IPv4 first ipv6 = [] - for host in event.resolved_hosts: - if self.helpers.is_ip(host, version=4): - return host - elif self.helpers.is_ip(host, version=6): - ipv6.append(host) + ips = [self.helpers.make_ip_type(h) for h in event.resolved_hosts if self.helpers.is_ip(h)] + for ip in sorted(ips): + if self.helpers.is_ip(ip, version=4): + return ip + elif self.helpers.is_ip(ip, version=6): + ipv6.append(ip) for ip in ipv6: return ip From ed36be588ab6a39e14ad29d7f9b3c19b2f392dd2 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Sun, 4 Feb 2024 19:06:49 -0500 Subject: [PATCH 23/41] massdns tweaks --- bbot/modules/massdns.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/bbot/modules/massdns.py b/bbot/modules/massdns.py index caf0b7fab9..c71138ef3a 100644 --- a/bbot/modules/massdns.py +++ b/bbot/modules/massdns.py @@ -158,7 +158,7 @@ async def massdns(self, domain, subdomains): subdomains = list(subdomains) domain_wildcard_rdtypes = set() - for domain, rdtypes in (await self.helpers.is_wildcard_domain(domain)).items(): + for _domain, rdtypes in (await self.helpers.is_wildcard_domain(domain)).items(): for rdtype, results in rdtypes.items(): if results: domain_wildcard_rdtypes.add(rdtype) @@ -385,10 +385,7 @@ def add_mutation(_domain_hash, m): for s in _subdomains: first_segment = s.split(".")[0] # skip stuff with lots of numbers (e.g. PTRs) - digits = self.digit_regex.findall(first_segment) - excessive_digits = len(digits) > 2 - long_digits = any(len(d) > 3 for d in digits) - if excessive_digits or long_digits: + if self.has_excessive_digits(first_segment): continue add_mutation(domain_hash, first_segment) for word in self.helpers.extract_words( @@ -454,3 +451,16 @@ def gen_random_subdomains(self, n=50): yield subdomain for _ in range(5): yield self.helpers.rand_string(length=8, digits=False) + + def has_excessive_digits(self, d): + """ + Identifies dns names with excessive numbers, e.g.: + - w1-2-3.evilcorp.com + - ptr1234.evilcorp.com + """ + digits = self.digit_regex.findall(d) + excessive_digits = len(digits) > 2 + long_digits = any(len(d) > 3 for d in digits) + if excessive_digits or long_digits: + return True + return False From d89cda2f5032990aa4a53c5237598b5a8eaa23d9 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Mon, 5 Feb 2024 11:17:13 -0500 Subject: [PATCH 24/41] fix internetdb bug --- bbot/modules/internetdb.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bbot/modules/internetdb.py b/bbot/modules/internetdb.py index 5e33f1155c..847db0c7ad 100644 --- a/bbot/modules/internetdb.py +++ b/bbot/modules/internetdb.py @@ -115,8 +115,8 @@ def get_ip(self, event): elif event.type == "DNS_NAME": # always try IPv4 first ipv6 = [] - ips = [self.helpers.make_ip_type(h) for h in event.resolved_hosts if self.helpers.is_ip(h)] - for ip in sorted(ips): + ips = [h for h in event.resolved_hosts if self.helpers.is_ip(h)] + for ip in sorted([str(ip) for ip in ips]): if self.helpers.is_ip(ip, version=4): return ip elif self.helpers.is_ip(ip, version=6): From 4be75af2155196d66712683939a66dfab5ee02f4 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Mon, 5 Feb 2024 14:56:29 -0500 Subject: [PATCH 25/41] fix \s warning --- bbot/core/helpers/misc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bbot/core/helpers/misc.py b/bbot/core/helpers/misc.py index aa3b645e07..a67e89402d 100644 --- a/bbot/core/helpers/misc.py +++ b/bbot/core/helpers/misc.py @@ -1451,7 +1451,7 @@ def search_dict_values(d, *regexes): ... ] ... } ... } - >>> url_regexes = re.compile(r'https?://[^\s<>"]+|www\.[^\s<>"]+') + >>> url_regexes = re.compile(r'https?://[^\\s<>"]+|www\.[^\\s<>"]+') >>> list(search_dict_values(dict_to_search, url_regexes)) ["https://www.evilcorp.com"] """ From 1c5a23411a6cbe3ac5b5f675b81634b8a77a1577 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Mon, 5 Feb 2024 15:38:00 -0500 Subject: [PATCH 26/41] increase massdns qsize --- bbot/modules/massdns.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bbot/modules/massdns.py b/bbot/modules/massdns.py index c71138ef3a..84e4066286 100644 --- a/bbot/modules/massdns.py +++ b/bbot/modules/massdns.py @@ -75,7 +75,7 @@ class massdns(subdomain_enum): }, ] reject_wildcards = "strict" - _qsize = 100 + _qsize = 10000 digit_regex = re.compile(r"\d+") From 86c0171099d38411a0cc2202d725abcc12ae2461 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Tue, 6 Feb 2024 12:54:29 -0500 Subject: [PATCH 27/41] increase qsize for speculate and excavate --- bbot/modules/internal/excavate.py | 1 + bbot/modules/internal/speculate.py | 1 + 2 files changed, 2 insertions(+) diff --git a/bbot/modules/internal/excavate.py b/bbot/modules/internal/excavate.py index 5ccc8d36a5..e4fe1c4760 100644 --- a/bbot/modules/internal/excavate.py +++ b/bbot/modules/internal/excavate.py @@ -340,6 +340,7 @@ class excavate(BaseInternalModule): meta = {"description": "Passively extract juicy tidbits from scan data"} scope_distance_modifier = None + _qsize = 10000 async def setup(self): self.csp = CSPExtractor(self) diff --git a/bbot/modules/internal/speculate.py b/bbot/modules/internal/speculate.py index 7aaf12d306..c039b3b1b1 100644 --- a/bbot/modules/internal/speculate.py +++ b/bbot/modules/internal/speculate.py @@ -35,6 +35,7 @@ class speculate(BaseInternalModule): } scope_distance_modifier = 1 _priority = 4 + _qsize = 10000 async def setup(self): scan_modules = [m for m in self.scan.modules.values() if m._type == "scan"] From e02c6865acdcd5cb3415e78492ae9041d74ea799 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Tue, 6 Feb 2024 12:59:06 -0500 Subject: [PATCH 28/41] log version command: verbose() --> trace() --- bbot/scanner/scanner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bbot/scanner/scanner.py b/bbot/scanner/scanner.py index 48833d1509..311c2048c6 100644 --- a/bbot/scanner/scanner.py +++ b/bbot/scanner/scanner.py @@ -331,7 +331,7 @@ async def async_start(self): await self._prep() self._start_log_handlers() - log.verbose(f'Ran BBOT {__version__} at {scan_start_time}, command: {" ".join(sys.argv)}') + self.trace(f'Ran BBOT {__version__} at {scan_start_time}, command: {" ".join(sys.argv)}') if not self.target: self.warning(f"No scan targets specified") From e9cb4fdfa6c8688e0322c28b6bc91d22fd83bb58 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Tue, 6 Feb 2024 13:17:12 -0500 Subject: [PATCH 29/41] allow independent http/dns debugging (without needing -d) --- bbot/core/helpers/dns.py | 2 +- bbot/core/helpers/web.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/bbot/core/helpers/dns.py b/bbot/core/helpers/dns.py index 6bcac62575..2d40167545 100644 --- a/bbot/core/helpers/dns.py +++ b/bbot/core/helpers/dns.py @@ -1009,7 +1009,7 @@ def _parse_rdtype(self, t, default=None): def debug(self, *args, **kwargs): if self._debug: - log.debug(*args, **kwargs) + log.trace(*args, **kwargs) def _get_dummy_module(self, name): try: diff --git a/bbot/core/helpers/web.py b/bbot/core/helpers/web.py index 41a755c0e7..27b1c87173 100644 --- a/bbot/core/helpers/web.py +++ b/bbot/core/helpers/web.py @@ -55,7 +55,7 @@ def __init__(self, *args, **kwargs): http_debug = self._bbot_scan.config.get("http_debug", None) if http_debug: - log.debug(f"Creating AsyncClient: {args}, {kwargs}") + log.trace(f"Creating AsyncClient: {args}, {kwargs}") self._persist_cookies = kwargs.pop("persist_cookies", True) @@ -224,10 +224,10 @@ async def request(self, *args, **kwargs): async with self._acatch(url, raise_error): if self.http_debug: logstr = f"Web request: {str(args)}, {str(kwargs)}" - log.debug(logstr) + log.trace(logstr) response = await client.request(*args, **kwargs) if self.http_debug: - log.debug( + log.trace( f"Web response from {url}: {response} (Length: {len(response.content)}) headers: {response.headers}" ) return response From bfef4731f5b37220e4708e623e834cd5ed5f0b33 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Tue, 6 Feb 2024 18:01:56 -0500 Subject: [PATCH 30/41] fix trace --- bbot/scanner/scanner.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/bbot/scanner/scanner.py b/bbot/scanner/scanner.py index 311c2048c6..1a74d41a84 100644 --- a/bbot/scanner/scanner.py +++ b/bbot/scanner/scanner.py @@ -934,10 +934,13 @@ def error(self, *args, trace=True, **kwargs): if trace: self.trace() - def trace(self): - e_type, e_val, e_traceback = exc_info() - if e_type is not None: - log.trace(traceback.format_exc()) + def trace(self, msg=None): + if msg is None: + e_type, e_val, e_traceback = exc_info() + if e_type is not None: + log.trace(traceback.format_exc()) + else: + log.trace(msg) def critical(self, *args, trace=True, **kwargs): log.critical(*args, extra={"scan_id": self.id}, **kwargs) From 2f788d70fbf0cbdce65ebe66b92598490d8073b1 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Wed, 7 Feb 2024 11:30:35 -0500 Subject: [PATCH 31/41] default qsize --> 1000, unlimited qsize for speculate & excavate --- bbot/modules/base.py | 2 +- bbot/modules/internal/excavate.py | 1 - bbot/modules/internal/speculate.py | 1 - 3 files changed, 1 insertion(+), 3 deletions(-) diff --git a/bbot/modules/base.py b/bbot/modules/base.py index e5fb355008..3c94ebc570 100644 --- a/bbot/modules/base.py +++ b/bbot/modules/base.py @@ -104,7 +104,7 @@ class BaseModule: _preserve_graph = False _stats_exclude = False - _qsize = 100 + _qsize = 1000 _priority = 3 _name = "base" _type = "scan" diff --git a/bbot/modules/internal/excavate.py b/bbot/modules/internal/excavate.py index e4fe1c4760..5ccc8d36a5 100644 --- a/bbot/modules/internal/excavate.py +++ b/bbot/modules/internal/excavate.py @@ -340,7 +340,6 @@ class excavate(BaseInternalModule): meta = {"description": "Passively extract juicy tidbits from scan data"} scope_distance_modifier = None - _qsize = 10000 async def setup(self): self.csp = CSPExtractor(self) diff --git a/bbot/modules/internal/speculate.py b/bbot/modules/internal/speculate.py index c039b3b1b1..7aaf12d306 100644 --- a/bbot/modules/internal/speculate.py +++ b/bbot/modules/internal/speculate.py @@ -35,7 +35,6 @@ class speculate(BaseInternalModule): } scope_distance_modifier = 1 _priority = 4 - _qsize = 10000 async def setup(self): scan_modules = [m for m in self.scan.modules.values() if m._type == "scan"] From ae25d8a0d532fe31258005b77aca6a36bb289029 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Fri, 9 Feb 2024 11:23:17 -0500 Subject: [PATCH 32/41] fix aioconsole bug --- bbot/cli.py | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/bbot/cli.py b/bbot/cli.py index 9a8ab51dd3..c64d242ff0 100755 --- a/bbot/cli.py +++ b/bbot/cli.py @@ -6,9 +6,9 @@ import asyncio import logging import traceback -from aioconsole import ainput from omegaconf import OmegaConf from contextlib import suppress +from aioconsole.stream import NonFileStreamReader # fix tee buffering sys.stdout.reconfigure(line_buffering=True) @@ -20,6 +20,7 @@ from bbot import __version__ from bbot.modules import module_loader from bbot.core.configurator.args import parser +from bbot.core.helpers.misc import smart_decode from bbot.core.helpers.logger import log_to_stderr from bbot.core.configurator import ensure_config_files, check_cli_args, environ @@ -321,21 +322,28 @@ def handle_keyboard_input(keyboard_input): toggle_log_level(logger=log) scanner.manager.modules_status(_log=True) + reader = NonFileStreamReader(sys.stdin) + async def akeyboard_listen(): - allowed_errors = 10 - while 1: - keyboard_input = "a" - try: - keyboard_input = await ainput() - except Exception: - allowed_errors -= 1 - handle_keyboard_input(keyboard_input) - if allowed_errors <= 0: - break + try: + allowed_errors = 10 + while 1: + keyboard_input = None + try: + keyboard_input = smart_decode((await reader.readline()).strip()) + allowed_errors = 0 + except Exception: + allowed_errors -= 1 + if keyboard_input is not None: + handle_keyboard_input(keyboard_input) + if allowed_errors <= 0: + break + except Exception as e: + log_to_stderr(f"Error in keyboard listen task: {e}", level="ERROR") + log_to_stderr(traceback.format_exc(), level="TRACE") try: keyboard_listen_task = asyncio.create_task(akeyboard_listen()) - await scanner.async_start_without_generator() finally: keyboard_listen_task.cancel() From c15141100bf67dda34e99c9772190a836a3f92fc Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Fri, 9 Feb 2024 11:52:37 -0500 Subject: [PATCH 33/41] more aioconsole bugfixing --- bbot/cli.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/bbot/cli.py b/bbot/cli.py index c64d242ff0..bc9e100392 100755 --- a/bbot/cli.py +++ b/bbot/cli.py @@ -8,7 +8,7 @@ import traceback from omegaconf import OmegaConf from contextlib import suppress -from aioconsole.stream import NonFileStreamReader +from aioconsole import stream # fix tee buffering sys.stdout.reconfigure(line_buffering=True) @@ -322,7 +322,13 @@ def handle_keyboard_input(keyboard_input): toggle_log_level(logger=log) scanner.manager.modules_status(_log=True) - reader = NonFileStreamReader(sys.stdin) + log.critical(f"is_pipe_transport_compatible: {stream.is_pipe_transport_compatible(sys.stdin)}") + reader = stream.NonFileStreamReader(sys.stdin) + + # Reader + reader = stream.StandardStreamReader() + protocol = stream.StandardStreamReaderProtocol(reader) + await asyncio.get_event_loop().connect_read_pipe(lambda: protocol, sys.stdin) async def akeyboard_listen(): try: @@ -331,8 +337,10 @@ async def akeyboard_listen(): keyboard_input = None try: keyboard_input = smart_decode((await reader.readline()).strip()) - allowed_errors = 0 - except Exception: + allowed_errors = 10 + except Exception as e: + log_to_stderr(f"Error in keyboard listen loop: {e}", level="TRACE") + log_to_stderr(traceback.format_exc(), level="TRACE") allowed_errors -= 1 if keyboard_input is not None: handle_keyboard_input(keyboard_input) From 381f12a9d1dae6fab338e5fc91bd02b955f50fc2 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Fri, 9 Feb 2024 11:56:10 -0500 Subject: [PATCH 34/41] fixed console logic --- bbot/cli.py | 100 ++++++++++++++++++++++++++-------------------------- 1 file changed, 50 insertions(+), 50 deletions(-) diff --git a/bbot/cli.py b/bbot/cli.py index bc9e100392..679ca24fb1 100755 --- a/bbot/cli.py +++ b/bbot/cli.py @@ -302,60 +302,60 @@ async def _main(): if not options.dry_run: log.trace(f"Command: {' '.join(sys.argv)}") - if not options.agent_mode and not options.yes and sys.stdin.isatty(): - log.hugesuccess(f"Scan ready. Press enter to execute {scanner.name}") - input() - - def handle_keyboard_input(keyboard_input): - kill_regex = re.compile(r"kill (?P[a-z0-9_]+)") - if keyboard_input: - log.verbose(f'Got keyboard input: "{keyboard_input}"') - kill_match = kill_regex.match(keyboard_input) - if kill_match: - module = kill_match.group("module") - if module in scanner.modules: - log.hugewarning(f'Killing module: "{module}"') - scanner.manager.kill_module(module, message="killed by user") - else: - log.warning(f'Invalid module: "{module}"') - else: - toggle_log_level(logger=log) - scanner.manager.modules_status(_log=True) - - log.critical(f"is_pipe_transport_compatible: {stream.is_pipe_transport_compatible(sys.stdin)}") - reader = stream.NonFileStreamReader(sys.stdin) - - # Reader - reader = stream.StandardStreamReader() - protocol = stream.StandardStreamReaderProtocol(reader) - await asyncio.get_event_loop().connect_read_pipe(lambda: protocol, sys.stdin) - - async def akeyboard_listen(): - try: - allowed_errors = 10 - while 1: - keyboard_input = None - try: - keyboard_input = smart_decode((await reader.readline()).strip()) - allowed_errors = 10 - except Exception as e: - log_to_stderr(f"Error in keyboard listen loop: {e}", level="TRACE") - log_to_stderr(traceback.format_exc(), level="TRACE") - allowed_errors -= 1 - if keyboard_input is not None: - handle_keyboard_input(keyboard_input) - if allowed_errors <= 0: - break - except Exception as e: - log_to_stderr(f"Error in keyboard listen task: {e}", level="ERROR") - log_to_stderr(traceback.format_exc(), level="TRACE") - try: + if sys.stdin.isatty(): + if not options.agent_mode and not options.yes: + log.hugesuccess(f"Scan ready. Press enter to execute {scanner.name}") + input() + + def handle_keyboard_input(keyboard_input): + kill_regex = re.compile(r"kill (?P[a-z0-9_]+)") + if keyboard_input: + log.verbose(f'Got keyboard input: "{keyboard_input}"') + kill_match = kill_regex.match(keyboard_input) + if kill_match: + module = kill_match.group("module") + if module in scanner.modules: + log.hugewarning(f'Killing module: "{module}"') + scanner.manager.kill_module(module, message="killed by user") + else: + log.warning(f'Invalid module: "{module}"') + else: + toggle_log_level(logger=log) + scanner.manager.modules_status(_log=True) + + # Reader + reader = stream.StandardStreamReader() + protocol = stream.StandardStreamReaderProtocol(reader) + await asyncio.get_event_loop().connect_read_pipe(lambda: protocol, sys.stdin) + + async def akeyboard_listen(): + try: + allowed_errors = 10 + while 1: + keyboard_input = None + try: + keyboard_input = smart_decode((await reader.readline()).strip()) + allowed_errors = 10 + except Exception as e: + log_to_stderr(f"Error in keyboard listen loop: {e}", level="TRACE") + log_to_stderr(traceback.format_exc(), level="TRACE") + allowed_errors -= 1 + if keyboard_input is not None: + handle_keyboard_input(keyboard_input) + if allowed_errors <= 0: + break + except Exception as e: + log_to_stderr(f"Error in keyboard listen task: {e}", level="ERROR") + log_to_stderr(traceback.format_exc(), level="TRACE") + keyboard_listen_task = asyncio.create_task(akeyboard_listen()) + + try: await scanner.async_start_without_generator() finally: - keyboard_listen_task.cancel() - with suppress(asyncio.CancelledError): + with suppress(Exception): + keyboard_listen_task.cancel() await keyboard_listen_task except bbot.core.errors.ScanError as e: From 94981175703a994bc5443cbe2a0cbdbce2ba1f0d Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Fri, 9 Feb 2024 12:14:46 -0500 Subject: [PATCH 35/41] remove unneeded cancel logic --- bbot/cli.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/bbot/cli.py b/bbot/cli.py index 679ca24fb1..e480b3d4ec 100755 --- a/bbot/cli.py +++ b/bbot/cli.py @@ -351,12 +351,7 @@ async def akeyboard_listen(): keyboard_listen_task = asyncio.create_task(akeyboard_listen()) - try: - await scanner.async_start_without_generator() - finally: - with suppress(Exception): - keyboard_listen_task.cancel() - await keyboard_listen_task + await scanner.async_start_without_generator() except bbot.core.errors.ScanError as e: log_to_stderr(str(e), level="ERROR") From a1d4f0d445abcdaa1942ecd770cbb42fbb246136 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Fri, 9 Feb 2024 13:34:08 -0500 Subject: [PATCH 36/41] flaked --- bbot/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bbot/cli.py b/bbot/cli.py index e480b3d4ec..e7e4400928 100755 --- a/bbot/cli.py +++ b/bbot/cli.py @@ -349,7 +349,7 @@ async def akeyboard_listen(): log_to_stderr(f"Error in keyboard listen task: {e}", level="ERROR") log_to_stderr(traceback.format_exc(), level="TRACE") - keyboard_listen_task = asyncio.create_task(akeyboard_listen()) + asyncio.create_task(akeyboard_listen()) await scanner.async_start_without_generator() From 7ec6e9ff7776d35883f28f538ef44c31bed40a57 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Mon, 12 Feb 2024 17:07:24 -0500 Subject: [PATCH 37/41] debug --> verbose for batch event handling --- bbot/modules/base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bbot/modules/base.py b/bbot/modules/base.py index 3c94ebc570..d888bf6999 100644 --- a/bbot/modules/base.py +++ b/bbot/modules/base.py @@ -358,12 +358,12 @@ async def _handle_batch(self): events, finish = await self._events_waiting() if events and not self.errored: counter.n = len(events) - self.debug(f"Handling batch of {len(events):,} events") + self.verbose(f"Handling batch of {len(events):,} events") submitted = True async with self.scan._acatch(f"{self.name}.handle_batch()"): handle_batch_task = asyncio.create_task(self.handle_batch(*events)) await handle_batch_task - self.debug(f"Finished handling batch of {len(events):,} events") + self.verbose(f"Finished handling batch of {len(events):,} events") if finish: context = f"{self.name}.finish()" async with self.scan._acatch(context), self._task_counter.count(context): From 07eea207443551656f6f2316c4fc04b6451b4a12 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Mon, 12 Feb 2024 17:08:56 -0500 Subject: [PATCH 38/41] keyboard listen logic fix --- bbot/cli.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bbot/cli.py b/bbot/cli.py index e7e4400928..275d156ec6 100755 --- a/bbot/cli.py +++ b/bbot/cli.py @@ -343,8 +343,8 @@ async def akeyboard_listen(): allowed_errors -= 1 if keyboard_input is not None: handle_keyboard_input(keyboard_input) - if allowed_errors <= 0: - break + if allowed_errors <= 0: + break except Exception as e: log_to_stderr(f"Error in keyboard listen task: {e}", level="ERROR") log_to_stderr(traceback.format_exc(), level="TRACE") From 29dce2f64021a25d66a225d94a7d1b652caa5a63 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Mon, 12 Feb 2024 17:31:45 -0500 Subject: [PATCH 39/41] debug massdns mutations --- bbot/modules/massdns.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/bbot/modules/massdns.py b/bbot/modules/massdns.py index 84e4066286..1a19e9ac80 100644 --- a/bbot/modules/massdns.py +++ b/bbot/modules/massdns.py @@ -378,6 +378,9 @@ def add_mutation(_domain_hash, m): self.mutations_tried.add(h) mutations.add(m) + num_base_mutations = len(base_mutations) + self.critical(f"base mutations: {num_base_mutations}") + # try every subdomain everywhere else for _domain, _subdomains in found: if _domain == domain: @@ -393,6 +396,9 @@ def add_mutation(_domain_hash, m): ): add_mutation(domain_hash, word) + num_massdns_mutations = len(mutations) - num_base_mutations + self.critical(f"added by massdns: {num_massdns_mutations}") + # numbers + devops mutations for mutation in self.helpers.word_cloud.mutations( subdomains, cloud=False, numbers=3, number_padding=1 @@ -401,12 +407,19 @@ def add_mutation(_domain_hash, m): m = delimiter.join(mutation).lower() add_mutation(domain_hash, m) + num_devops = len(mutations) - num_massdns_mutations + self.critical(f"added by word cloud: {num_devops}") + # special dns mutator + self.critical(f"dns_mutator size: {len(self.helpers.word_cloud.dns_mutator)} (max: {self.max_mutations})") for subdomain in self.helpers.word_cloud.dns_mutator.mutations( subdomains, max_mutations=self.max_mutations ): add_mutation(domain_hash, subdomain) + num_mutations = len(mutations) - num_devops + self.critical(f"added by dns mutator: {num_mutations}") + if mutations: self.info(f"Trying {len(mutations):,} mutations against {domain} ({i+1}/{len(found)})") results = list(await self.massdns(query, mutations)) From ce95ce8f078bd5b674e86527b61bd6f48d8bee40 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Tue, 13 Feb 2024 13:17:09 -0500 Subject: [PATCH 40/41] add debug statements for mutations --- bbot/modules/massdns.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/bbot/modules/massdns.py b/bbot/modules/massdns.py index 1a19e9ac80..02ff1aa85c 100644 --- a/bbot/modules/massdns.py +++ b/bbot/modules/massdns.py @@ -379,7 +379,7 @@ def add_mutation(_domain_hash, m): mutations.add(m) num_base_mutations = len(base_mutations) - self.critical(f"base mutations: {num_base_mutations}") + self.debug(f"Base mutations for {domain}: {num_base_mutations:,}") # try every subdomain everywhere else for _domain, _subdomains in found: @@ -397,7 +397,7 @@ def add_mutation(_domain_hash, m): add_mutation(domain_hash, word) num_massdns_mutations = len(mutations) - num_base_mutations - self.critical(f"added by massdns: {num_massdns_mutations}") + self.debug(f"Mutations from previous subdomains for {domain}: {num_massdns_mutations:,}") # numbers + devops mutations for mutation in self.helpers.word_cloud.mutations( @@ -407,18 +407,20 @@ def add_mutation(_domain_hash, m): m = delimiter.join(mutation).lower() add_mutation(domain_hash, m) - num_devops = len(mutations) - num_massdns_mutations - self.critical(f"added by word cloud: {num_devops}") + num_word_cloud_mutations = len(mutations) - num_massdns_mutations + self.debug(f"Mutations added by word cloud for {domain}: {num_word_cloud_mutations:,}") # special dns mutator - self.critical(f"dns_mutator size: {len(self.helpers.word_cloud.dns_mutator)} (max: {self.max_mutations})") + self.debug( + f"DNS Mutator size: {len(self.helpers.word_cloud.dns_mutator):,} (limited to {self.max_mutations:,})" + ) for subdomain in self.helpers.word_cloud.dns_mutator.mutations( subdomains, max_mutations=self.max_mutations ): add_mutation(domain_hash, subdomain) - num_mutations = len(mutations) - num_devops - self.critical(f"added by dns mutator: {num_mutations}") + num_mutations = len(mutations) - num_word_cloud_mutations + self.debug(f"Mutations added by DNS Mutator: {num_mutations:,}") if mutations: self.info(f"Trying {len(mutations):,} mutations against {domain} ({i+1}/{len(found)})") From 28f24ace780e1659ab4e05dc4f1a6242d79da963 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Wed, 14 Feb 2024 11:50:14 -0500 Subject: [PATCH 41/41] restore wildcard rdtype optimization --- bbot/core/helpers/dns.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/bbot/core/helpers/dns.py b/bbot/core/helpers/dns.py index 2d40167545..1c7e5187d6 100644 --- a/bbot/core/helpers/dns.py +++ b/bbot/core/helpers/dns.py @@ -938,7 +938,7 @@ async def is_wildcard_domain(self, domain, log_info=False): # resolve a bunch of random subdomains of the same parent is_wildcard = False wildcard_results = dict() - for rdtype in rdtypes_to_check: + for rdtype in list(rdtypes_to_check): # continue if a wildcard was already found for this rdtype # if rdtype in self._wildcard_cache[host_hash]: # continue @@ -952,6 +952,8 @@ async def is_wildcard_domain(self, domain, log_info=False): wildcard_results[rdtype].update(results) # we know this rdtype is a wildcard # so we don't need to check it anymore + with suppress(KeyError): + rdtypes_to_check.remove(rdtype) self._wildcard_cache.update({host_hash: wildcard_results}) wildcard_domain_results.update({host: wildcard_results})