diff --git a/README.md b/README.md index 94c5e189f..51e7a5300 100644 --- a/README.md +++ b/README.md @@ -66,7 +66,7 @@ config: -BBOT consistently finds 20-50% more subdomains than other tools. The bigger the domain, the bigger the difference. To learn how this is possible, see [How It Works](https://www.blacklanternsecurity.com/bbot/Stable/how_it_works/). +BBOT consistently finds 20-50% more subdomains than other tools. The bigger the domain, the bigger the difference. To learn how this is possible, see [How It Works](https://www.blacklanternsecurity.com/bbot/Dev/how_it_works/). ![subdomain-stats-ebay](https://github.com/blacklanternsecurity/bbot/assets/20261699/de3e7f21-6f52-4ac4-8eab-367296cd385f) @@ -394,7 +394,7 @@ Thanks to these amazing people for contributing to BBOT! :heart: Special thanks to: -- @TheTechromancer for creating [BBOT](https://github.com/blacklanternsecurity/bbot) +- @TheTechromancer for creating BBOT - @liquidsec for his extensive work on BBOT's web hacking features, including [badsecrets](https://github.com/blacklanternsecurity/badsecrets) and [baddns](https://github.com/blacklanternsecurity/baddns) - Steve Micallef (@smicallef) for creating Spiderfoot - @kerrymilan for his Neo4j and Ansible expertise diff --git a/bbot/cli.py b/bbot/cli.py index 47b2c29ad..877f2bcaa 100755 --- a/bbot/cli.py +++ b/bbot/cli.py @@ -24,13 +24,6 @@ www.blacklanternsecurity.com/bbot """ print(ascii_art, file=sys.stderr) - log_to_stderr( - "This is a pre-release of BBOT 2.0. If you upgraded from version 1, we recommend cleaning your old configs etc. before running this version!", - level="WARNING", - ) - log_to_stderr( - "For details, see https://github.com/blacklanternsecurity/bbot/discussions/1540", level="WARNING" - ) scan_name = "" diff --git a/bbot/core/engine.py b/bbot/core/engine.py index 52d4b871a..70652d456 100644 --- a/bbot/core/engine.py +++ b/bbot/core/engine.py @@ -62,15 +62,21 @@ def unpickle(self, binary): return error_sentinel async def _infinite_retry(self, callback, *args, **kwargs): - interval = kwargs.pop("_interval", 10) + interval = kwargs.pop("_interval", 15) context = kwargs.pop("_context", "") + # default overall timeout of 5 minutes (15 second interval * 20 iterations) + max_retries = kwargs.pop("_max_retries", 4 * 5) if not context: context = f"{callback.__name__}({args}, {kwargs})" + retries = 0 while not self._shutdown_status: try: return await asyncio.wait_for(callback(*args, **kwargs), timeout=interval) - except (TimeoutError, asyncio.TimeoutError): - self.log.debug(f"{self.name}: Timeout waiting for response for {context}, retrying...") + except (TimeoutError, asyncio.exceptions.TimeoutError): + self.log.debug(f"{self.name}: Timeout after {interval:,} seconds{context}, retrying...") + retries += 1 + if max_retries is not None and retries > max_retries: + raise TimeoutError(f"Timed out after {max_retries*interval:,} seconds {context}") class EngineClient(EngineBase): @@ -205,7 +211,9 @@ async def send_cancel_message(self, socket, context): message = pickle.dumps({"c": -1}) await self._infinite_retry(socket.send, message) while 1: - response = await self._infinite_retry(socket.recv, _context=f"waiting for CANCEL_OK from {context}") + response = await self._infinite_retry( + socket.recv, _context=f"waiting for CANCEL_OK from {context}", _max_retries=4 + ) response = pickle.loads(response) if isinstance(response, dict): response = response.get("m", "") @@ -216,9 +224,9 @@ async def send_shutdown_message(self): async with self.new_socket() as socket: # -99 == special shutdown message message = pickle.dumps({"c": -99}) - with suppress(TimeoutError, asyncio.TimeoutError): + with suppress(TimeoutError, asyncio.exceptions.TimeoutError): await asyncio.wait_for(socket.send(message), 0.5) - with suppress(TimeoutError, asyncio.TimeoutError): + with suppress(TimeoutError, asyncio.exceptions.TimeoutError): while 1: response = await asyncio.wait_for(socket.recv(), 0.5) response = pickle.loads(response) @@ -390,18 +398,21 @@ async def run_and_return(self, client_id, command_fn, *args, **kwargs): with self.client_id_context(client_id): try: self.log.debug(f"{self.name} run-and-return {fn_str}") + result = error_sentinel try: result = await command_fn(*args, **kwargs) except BaseException as e: - error = f"Error in {self.name}.{fn_str}: {e}" - self.log.debug(error) - trace = traceback.format_exc() - self.log.debug(trace) - result = {"_e": (error, trace)} + if not in_exception_chain(e, (KeyboardInterrupt, asyncio.CancelledError)): + error = f"Error in {self.name}.{fn_str}: {e}" + self.log.debug(error) + trace = traceback.format_exc() + self.log.debug(trace) + result = {"_e": (error, trace)} finally: self.tasks.pop(client_id, None) - self.log.debug(f"{self.name}: Sending response to {fn_str}: {result}") - await self.send_socket_multipart(client_id, result) + if result is not error_sentinel: + self.log.debug(f"{self.name}: Sending response to {fn_str}: {result}") + await self.send_socket_multipart(client_id, result) except BaseException as e: self.log.critical( f"Unhandled exception in {self.name}.run_and_return({client_id}, {command_fn}, {args}, {kwargs}): {e}" @@ -417,14 +428,16 @@ async def run_and_yield(self, client_id, command_fn, *args, **kwargs): self.log.debug(f"{self.name} run-and-yield {fn_str}") try: async for _ in command_fn(*args, **kwargs): + self.log.debug(f"{self.name}: sending iteration for {command_fn.__name__}(): {_}") await self.send_socket_multipart(client_id, _) except BaseException as e: - error = f"Error in {self.name}.{fn_str}: {e}" - trace = traceback.format_exc() - self.log.debug(error) - self.log.debug(trace) - result = {"_e": (error, trace)} - await self.send_socket_multipart(client_id, result) + if not in_exception_chain(e, (KeyboardInterrupt, asyncio.CancelledError)): + error = f"Error in {self.name}.{fn_str}: {e}" + trace = traceback.format_exc() + self.log.debug(error) + self.log.debug(trace) + result = {"_e": (error, trace)} + await self.send_socket_multipart(client_id, result) finally: self.log.debug(f"{self.name} reached end of run-and-yield iteration for {command_fn.__name__}()") # _s == special signal that means StopIteration @@ -537,9 +550,21 @@ def new_child_task(self, client_id, coro): self.child_tasks[client_id] = {task} return task - async def finished_tasks(self, client_id): + async def finished_tasks(self, client_id, timeout=None): child_tasks = self.child_tasks.get(client_id, set()) - done, pending = await asyncio.wait(child_tasks, return_when=asyncio.FIRST_COMPLETED) + try: + done, pending = await asyncio.wait(child_tasks, return_when=asyncio.FIRST_COMPLETED, timeout=timeout) + except BaseException as e: + if isinstance(e, (TimeoutError, asyncio.exceptions.TimeoutError)): + done = set() + self.log.warning(f"{self.name}: Timeout after {timeout:,} seconds in finished_tasks({child_tasks})") + for task in child_tasks: + task.cancel() + else: + if not in_exception_chain(e, (KeyboardInterrupt, asyncio.CancelledError)): + self.log.error(f"{self.name}: Unhandled exception in finished_tasks({child_tasks}): {e}") + self.log.trace(traceback.format_exc()) + raise self.child_tasks[client_id] = pending return done @@ -562,7 +587,7 @@ async def cancel_task(self, client_id): async def _cancel_task(self, task): try: await asyncio.wait_for(task, timeout=10) - except (TimeoutError, asyncio.TimeoutError): + except (TimeoutError, asyncio.exceptions.TimeoutError): self.log.debug(f"{self.name}: Timeout cancelling task") return except (KeyboardInterrupt, asyncio.CancelledError): diff --git a/bbot/core/event/base.py b/bbot/core/event/base.py index bcf41a37c..9a5a9b869 100644 --- a/bbot/core/event/base.py +++ b/bbot/core/event/base.py @@ -113,6 +113,9 @@ class BaseEvent: _data_validator = None # Whether to increment scope distance if the child and parent hosts are the same _scope_distance_increment_same_host = False + # Don't allow duplicates to occur within a parent chain + # In other words, don't emit the event if the same one already exists in its discovery context + _suppress_chain_dupes = False def __init__( self, @@ -169,6 +172,7 @@ def __init__( self._resolved_hosts = set() self.dns_children = dict() self._discovery_context = "" + self._discovery_context_regex = re.compile(r"\{(?:event|module)[^}]*\}") self.web_spider_distance = 0 # for creating one-off events without enforcing parent requirement @@ -339,10 +343,14 @@ def discovery_context(self): @discovery_context.setter def discovery_context(self, context): + def replace(match): + s = match.group() + return s.format(module=self.module, event=self) + try: - self._discovery_context = context.format(module=self.module, event=self) + self._discovery_context = self._discovery_context_regex.sub(replace, context) except Exception as e: - log.warning(f"Error formatting discovery context for {self}: {e} (context: '{context}')") + log.trace(f"Error formatting discovery context for {self}: {e} (context: '{context}')") self._discovery_context = context @property @@ -350,8 +358,10 @@ def discovery_path(self): """ This event's full discovery context, including those of all its parents """ - full_event_chain = list(reversed(self.get_parents())) + [self] - return [[e.id, e.discovery_context] for e in full_event_chain if e.type != "SCAN"] + parent_path = [] + if self.parent is not None and self != self.parent: + parent_path = self.parent.discovery_path + return parent_path + [[self.id, self.discovery_context]] @property def words(self): @@ -870,6 +880,10 @@ class SCAN(BaseEvent): def _data_human(self): return f"{self.data['name']} ({self.data['id']})" + @property + def discovery_path(self): + return [] + class FINISHED(BaseEvent): """ @@ -1165,6 +1179,7 @@ def pretty_string(self): class STORAGE_BUCKET(DictEvent, URL_UNVERIFIED): _always_emit = True + _suppress_chain_dupes = True class _data_validator(BaseModel): name: str @@ -1436,7 +1451,8 @@ class FILESYSTEM(DictPathEvent): class RAW_DNS_RECORD(DictHostEvent): - pass + # don't emit raw DNS records for affiliates + _always_emit_tags = ["target"] def make_event( diff --git a/bbot/core/helpers/async_helpers.py b/bbot/core/helpers/async_helpers.py index f19b0f343..123385135 100644 --- a/bbot/core/helpers/async_helpers.py +++ b/bbot/core/helpers/async_helpers.py @@ -2,9 +2,10 @@ import random import asyncio import logging +import functools from datetime import datetime -from cachetools import LRUCache from .misc import human_timedelta +from cachetools import keys, LRUCache from contextlib import asynccontextmanager log = logging.getLogger("bbot.core.helpers.async_helpers") @@ -33,7 +34,7 @@ class NamedLock: E.g. simultaneous DNS lookups on the same hostname """ - def __init__(self, max_size=1000): + def __init__(self, max_size=10000): self._cache = LRUCache(maxsize=max_size) @asynccontextmanager @@ -105,3 +106,24 @@ def async_to_sync_gen(async_gen): yield loop.run_until_complete(async_gen.__anext__()) except StopAsyncIteration: pass + + +def async_cachedmethod(cache, key=keys.hashkey): + def decorator(method): + async def wrapper(self, *args, **kwargs): + method_cache = cache(self) + k = key(*args, **kwargs) + try: + return method_cache[k] + except KeyError: + pass + ret = await method(self, *args, **kwargs) + try: + method_cache[k] = ret + except ValueError: + pass + return ret + + return functools.wraps(method)(wrapper) + + return decorator diff --git a/bbot/core/helpers/dns/dns.py b/bbot/core/helpers/dns/dns.py index 7e347ed69..2f77ce081 100644 --- a/bbot/core/helpers/dns/dns.py +++ b/bbot/core/helpers/dns/dns.py @@ -2,10 +2,12 @@ import logging import dns.exception import dns.asyncresolver +from cachetools import LFUCache from radixtarget import RadixTarget from bbot.errors import DNSError from bbot.core.engine import EngineClient +from bbot.core.helpers.async_helpers import async_cachedmethod from ..misc import clean_dns_record, is_ip, is_domain, is_dns_name from .engine import DNSEngine @@ -79,6 +81,9 @@ def __init__(self, parent_helper): # brute force helper self._brute = None + self._is_wildcard_cache = LFUCache(maxsize=1000) + self._is_wildcard_domain_cache = LFUCache(maxsize=1000) + async def resolve(self, query, **kwargs): return await self.run_and_return("resolve", query=query, **kwargs) @@ -111,6 +116,7 @@ def brute(self): self._brute = DNSBrute(self.parent_helper) return self._brute + @async_cachedmethod(lambda self: self._is_wildcard_cache) async def is_wildcard(self, query, ips=None, rdtype=None): """ Use this method to check whether a *host* is a wildcard entry @@ -156,6 +162,7 @@ async def is_wildcard(self, query, ips=None, rdtype=None): return await self.run_and_return("is_wildcard", query=query, ips=ips, rdtype=rdtype) + @async_cachedmethod(lambda self: self._is_wildcard_domain_cache) async def is_wildcard_domain(self, domain, log_info=False): domain = self._wildcard_prevalidation(domain) if not domain: diff --git a/bbot/core/helpers/dns/engine.py b/bbot/core/helpers/dns/engine.py index 91efca10d..6840d5506 100644 --- a/bbot/core/helpers/dns/engine.py +++ b/bbot/core/helpers/dns/engine.py @@ -119,8 +119,8 @@ async def resolve(self, query, **kwargs): for _, host in extract_targets(answer): results.add(host) except BaseException: - log.trace(f"Caught exception in resolve({query}, {kwargs}):") - log.trace(traceback.format_exc()) + self.log.trace(f"Caught exception in resolve({query}, {kwargs}):") + self.log.trace(traceback.format_exc()) raise self.debug(f"Results for {query} with kwargs={kwargs}: {results}") @@ -165,8 +165,8 @@ async def resolve_raw(self, query, **kwargs): else: return await self._resolve_hostname(query, rdtype=rdtype, **kwargs) except BaseException: - log.trace(f"Caught exception in resolve_raw({query}, {kwargs}):") - log.trace(traceback.format_exc()) + self.log.trace(f"Caught exception in resolve_raw({query}, {kwargs}):") + self.log.trace(traceback.format_exc()) raise async def _resolve_hostname(self, query, **kwargs): @@ -219,11 +219,11 @@ async def _resolve_hostname(self, query, **kwargs): if error_count >= self.abort_threshold: connectivity = await self._connectivity_check() if connectivity: - log.verbose( + self.log.verbose( f'Aborting query "{query}" because failed {rdtype} queries for "{parent}" ({error_count:,}) exceeded abort threshold ({self.abort_threshold:,})' ) if parent_hash not in self._dns_warnings: - log.verbose( + self.log.verbose( f'Aborting future {rdtype} queries to "{parent}" because error count ({error_count:,}) exceeded abort threshold ({self.abort_threshold:,})' ) self._dns_warnings.add(parent_hash) @@ -239,6 +239,7 @@ async def _resolve_hostname(self, query, **kwargs): dns.exception.Timeout, dns.resolver.LifetimeTimeout, TimeoutError, + asyncio.exceptions.TimeoutError, ) as e: try: self._errors[parent_hash] += 1 @@ -257,7 +258,7 @@ async def _resolve_hostname(self, query, **kwargs): self.debug(err_msg) self.debug(f"Retry (#{retry_num}) resolving {query} with kwargs={kwargs}") else: - log.verbose(err_msg) + self.log.verbose(err_msg) if results: self._last_dns_success = time.time() @@ -307,10 +308,11 @@ async def _resolve_ip(self, query, **kwargs): self._dns_cache[dns_cache_hash] = results break except ( + dns.resolver.NoNameservers, dns.exception.Timeout, dns.resolver.LifetimeTimeout, - dns.resolver.NoNameservers, TimeoutError, + asyncio.exceptions.TimeoutError, ) as e: errors.append(e) # don't retry if we get a SERVFAIL @@ -361,7 +363,7 @@ def new_task(query): while tasks: # While there are tasks pending # Wait for the first task to complete - finished = await self.finished_tasks(client_id) + finished = await self.finished_tasks(client_id, timeout=120) for task in finished: results = task.result() @@ -388,7 +390,7 @@ def new_task(query, rdtype): while tasks: # While there are tasks pending # Wait for the first task to complete - finished = await self.finished_tasks(client_id) + finished = await self.finished_tasks(client_id, timeout=120) for task in finished: answers, errors = task.result() @@ -423,13 +425,13 @@ async def _catch(self, callback, *args, **kwargs): except dns.resolver.NoNameservers: raise except (dns.exception.Timeout, dns.resolver.LifetimeTimeout, TimeoutError): - log.debug(f"DNS query with args={args}, kwargs={kwargs} timed out after {self.timeout} seconds") + self.log.debug(f"DNS query with args={args}, kwargs={kwargs} timed out after {self.timeout} seconds") raise except dns.exception.DNSException as e: self.debug(f"{e} (args={args}, kwargs={kwargs})") except Exception as e: - log.warning(f"Error in {callback.__qualname__}() with args={args}, kwargs={kwargs}: {e}") - log.trace(traceback.format_exc()) + self.log.warning(f"Error in {callback.__qualname__}() with args={args}, kwargs={kwargs}: {e}") + self.log.trace(traceback.format_exc()) return [] async def is_wildcard(self, query, ips=None, rdtype=None): @@ -531,7 +533,7 @@ async def is_wildcard(self, query, ips=None, rdtype=None): base_query_rdtypes = set(query_baseline) wildcard_rdtypes_set = set([k for k, v in result.items() if v[0] is True]) if base_query_rdtypes and wildcard_rdtypes_set and base_query_rdtypes == wildcard_rdtypes_set: - log.debug( + self.log.debug( f"Breaking from wildcard detection for {query} at {host} because base query rdtypes ({base_query_rdtypes}) == wildcard rdtypes ({wildcard_rdtypes_set})" ) raise DNSWildcardBreak() @@ -582,7 +584,7 @@ async def is_wildcard_domain(self, domain, log_info=False): wildcard_domain_results[host] = self._wildcard_cache[host_hash] continue - log.verbose(f"Checking if {host} is a wildcard") + self.log.verbose(f"Checking if {host} is a wildcard") # determine if this is a wildcard domain @@ -612,12 +614,12 @@ async def is_wildcard_domain(self, domain, log_info=False): wildcard_domain_results.update({host: wildcard_results}) if is_wildcard: wildcard_rdtypes_str = ",".join(sorted([t.upper() for t, r in wildcard_results.items() if r])) - log_fn = log.verbose + log_fn = self.log.verbose if log_info: - log_fn = log.info + log_fn = self.log.info log_fn(f"Encountered domain with wildcard DNS ({wildcard_rdtypes_str}): {host}") else: - log.verbose(f"Finished checking {host}, it is not a wildcard") + self.log.verbose(f"Finished checking {host}, it is not a wildcard") return wildcard_domain_results @@ -653,14 +655,14 @@ async def _connectivity_check(self, interval=5): self._last_dns_success = time.time() return True if time.time() - self._last_connectivity_warning > interval: - log.warning(f"DNS queries are failing, please check your internet connection") + self.log.warning(f"DNS queries are failing, please check your internet connection") self._last_connectivity_warning = time.time() self._errors.clear() return False def debug(self, *args, **kwargs): if self._debug: - log.trace(*args, **kwargs) + self.log.trace(*args, **kwargs) @property def in_tests(self): diff --git a/bbot/core/helpers/web/engine.py b/bbot/core/helpers/web/engine.py index bc58057ed..30e037e6c 100644 --- a/bbot/core/helpers/web/engine.py +++ b/bbot/core/helpers/web/engine.py @@ -100,7 +100,7 @@ def new_task(): while tasks: # While there are tasks pending # Wait for the first task to complete - finished = await self.finished_tasks(client_id) + finished = await self.finished_tasks(client_id, timeout=120) for task in finished: response = task.result() diff --git a/bbot/modules/bucket_azure.py b/bbot/modules/bucket_azure.py index c89034ccb..032e409b4 100644 --- a/bbot/modules/bucket_azure.py +++ b/bbot/modules/bucket_azure.py @@ -30,3 +30,7 @@ def check_bucket_exists(self, bucket_name, response): status_code = getattr(response, "status_code", 0) existent_bucket = status_code != 0 return existent_bucket, set() + + def clean_bucket_url(self, url): + # only return root URL + return "/".join(url.split("/")[:3]) diff --git a/bbot/modules/deadly/dastardly.py b/bbot/modules/deadly/dastardly.py index 4476b99ab..04fb5313f 100644 --- a/bbot/modules/deadly/dastardly.py +++ b/bbot/modules/deadly/dastardly.py @@ -94,7 +94,9 @@ def parse_dastardly_xml(self, xml_file): for testsuite in et.iter("testsuite"): yield TestSuite(testsuite) except FileNotFoundError: - pass + self.debug(f"Could not find Dastardly XML file at {xml_file}") + except OSError as e: + self.verbose(f"Error opening Dastardly XML file at {xml_file}: {e}") except etree.ParseError as e: self.warning(f"Error parsing Dastardly XML at {xml_file}: {e}") diff --git a/bbot/modules/dnsbrute_mutations.py b/bbot/modules/dnsbrute_mutations.py index 2087ccb15..78513fc2d 100644 --- a/bbot/modules/dnsbrute_mutations.py +++ b/bbot/modules/dnsbrute_mutations.py @@ -106,7 +106,7 @@ def add_mutation(m): # skip if there's hardly any mutations if len(mutations) < 10: - self.debug( + self.verbose( f"Skipping {len(mutations):,} mutations against {domain} because there are less than 10" ) break diff --git a/bbot/modules/internal/cloudcheck.py b/bbot/modules/internal/cloudcheck.py index 15d9bf364..9b7b6e147 100644 --- a/bbot/modules/internal/cloudcheck.py +++ b/bbot/modules/internal/cloudcheck.py @@ -14,7 +14,9 @@ async def setup(self): def make_dummy_modules(self): self.dummy_modules = {} for provider_name, provider in self.helpers.cloud.providers.items(): - self.dummy_modules[provider_name] = self.scan._make_dummy_module(f"cloud_{provider_name}", _type="scan") + module = self.scan._make_dummy_module(f"cloud_{provider_name}", _type="scan") + module.default_discovery_context = "{module} derived {event.type}: {event.host}" + self.dummy_modules[provider_name] = module async def filter_event(self, event): if (not event.host) or (event.type in ("IP_RANGE",)): @@ -27,6 +29,7 @@ async def handle_event(self, event, **kwargs): self.make_dummy_modules() # cloud tagging by hosts hosts_to_check = set(str(s) for s in event.resolved_hosts) + # we use the original host, since storage buckets hostnames might be collapsed to _wildcard hosts_to_check.add(str(event.host_original)) for host in hosts_to_check: for provider, provider_type, subnet in self.helpers.cloudcheck(host): diff --git a/bbot/modules/internal/dnsresolve.py b/bbot/modules/internal/dnsresolve.py index fcf7e90af..0877c3aa7 100644 --- a/bbot/modules/internal/dnsresolve.py +++ b/bbot/modules/internal/dnsresolve.py @@ -1,6 +1,6 @@ import ipaddress from contextlib import suppress -from cachetools import LRUCache +from cachetools import LFUCache from bbot.errors import ValidationError from bbot.core.helpers.dns.engine import all_rdtypes @@ -10,6 +10,14 @@ class DNSResolve(InterceptModule): + """ + TODO: + - scrap event cache in favor of the parent backtracking method + - don't duplicate resolution on the same host + - clean up wildcard checking to only happen once, and re-emit/abort if one is detected + - same thing with main_host_event. we should never be processing two events - only one. + """ + watched_events = ["*"] _priority = 1 scope_distance_modifier = None @@ -36,7 +44,7 @@ async def setup(self): self._emit_raw_records = None # event resolution cache - self._event_cache = LRUCache(maxsize=10000) + self._event_cache = LFUCache(maxsize=10000) self._event_cache_locks = NamedLock() self.host_module = self.HostModule(self.scan) @@ -64,12 +72,7 @@ async def filter_event(self, event): return True async def handle_event(self, event, **kwargs): - dns_tags = set() - event_whitelisted = False - event_blacklisted = False - event_is_ip = self.helpers.is_ip(event.host) - event_host = str(event.host) event_host_hash = hash(event_host) @@ -92,92 +95,13 @@ async def handle_event(self, event, **kwargs): # dns_tags, dns_children, event_whitelisted, event_blacklisted = self._event_cache[event_host_hash] except KeyError: - main_host_event = self.get_dns_parent(event) + main_host_event, dns_tags, event_whitelisted, event_blacklisted, raw_record_events = ( + await self.resolve_event(event) + ) - rdtypes_to_resolve = () - if event_is_ip: - if not self.minimal: - rdtypes_to_resolve = ("PTR",) - else: - if self.minimal: - rdtypes_to_resolve = ("A", "AAAA", "CNAME") - else: - rdtypes_to_resolve = all_rdtypes - - # if missing from cache, do DNS resolution - queries = [(event_host, rdtype) for rdtype in rdtypes_to_resolve] - error_rdtypes = [] - raw_record_events = [] - async for (query, rdtype), (answer, errors) in self.helpers.dns.resolve_raw_batch(queries): - if self.emit_raw_records and rdtype not in ("A", "AAAA", "CNAME", "PTR"): - raw_record_event = self.make_event( - {"host": str(event_host), "type": rdtype, "answer": answer.to_text()}, - "RAW_DNS_RECORD", - parent=main_host_event, - tags=[f"{rdtype.lower()}-record"], - context=f"{rdtype} lookup on {{event.parent.host}} produced {{event.type}}", - ) - raw_record_events.append(raw_record_event) - if errors: - error_rdtypes.append(rdtype) - for _rdtype, host in extract_targets(answer): - dns_tags.add(f"{rdtype.lower()}-record") - try: - main_host_event.dns_children[_rdtype].add(host) - except KeyError: - main_host_event.dns_children[_rdtype] = {host} - - # if there were dns resolution errors, notify the user with tags - for rdtype in error_rdtypes: - if rdtype not in main_host_event.dns_children: - dns_tags.add(f"{rdtype.lower()}-error") - - # if there weren't any DNS children and it's not an IP address, tag as unresolved - if not main_host_event.dns_children and not event_is_ip: - dns_tags.add("unresolved") - - # check DNS children against whitelists and blacklists - for rdtype, children in main_host_event.dns_children.items(): - if event_blacklisted: - break - for host in children: - # whitelisting / blacklisting based on resolved hosts - if rdtype in ("A", "AAAA", "CNAME"): - # having a CNAME to an in-scope resource doesn't make you in-scope - if (not event_whitelisted) and rdtype != "CNAME": - with suppress(ValidationError): - if self.scan.whitelisted(host): - event_whitelisted = True - dns_tags.add(f"dns-whitelisted-{rdtype.lower()}") - # CNAME to a blacklisted resource, means you're blacklisted - with suppress(ValidationError): - if self.scan.blacklisted(host): - dns_tags.add("blacklisted") - dns_tags.add(f"dns-blacklisted-{rdtype.lower()}") - event_blacklisted = True - event_whitelisted = False - break - - # check for private IPs - try: - ip = ipaddress.ip_address(host) - if ip.is_private: - dns_tags.add("private-ip") - except ValueError: - continue - - # add DNS tags to main host - for tag in dns_tags: - main_host_event.add_tag(tag) - - # set resolved_hosts attribute - for rdtype, children in main_host_event.dns_children.items(): - if rdtype in ("A", "AAAA", "CNAME"): - for host in children: - main_host_event._resolved_hosts.add(host) - - # if we're not blacklisted, emit the main host event and all its raw records - if not event_blacklisted: + # if we're not blacklisted and we haven't already done it, emit the main host event and all its raw records + main_host_resolved = getattr(main_host_event, "_resolved", False) + if not event_blacklisted and not main_host_resolved: if event_whitelisted: self.debug( f"Making {main_host_event} in-scope because it resolves to an in-scope resource (A/AAAA)" @@ -202,8 +126,19 @@ async def handle_event(self, event, **kwargs): if not self.minimal: in_dns_scope = -1 < event.scope_distance < self._dns_search_distance for rdtype, records in main_host_event.dns_children.items(): - module = self.scan._make_dummy_module_dns(rdtype) + module = self._make_dummy_module(rdtype) for record in records: + parents = main_host_event.get_parents() + for e in parents: + e_is_host = e.type in ("DNS_NAME", "IP_ADDRESS") + e_parent_matches = str(e.parent.host) == str(main_host_event.host) + e_host_matches = str(e.data) == str(record) + e_module_matches = str(e.module) == str(module) + if e_is_host and e_parent_matches and e_host_matches and e_module_matches: + self.trace( + f"TRYING TO EMIT ALREADY-EMITTED {record}:{rdtype} CHILD OF {main_host_event}, parents: {parents}" + ) + return try: child_event = self.scan.make_event( record, "DNS_NAME", module=module, parent=main_host_event @@ -220,6 +155,9 @@ async def handle_event(self, event, **kwargs): f'Event validation failed for DNS child of {main_host_event}: "{record}" ({rdtype}): {e}' ) + # mark the host as resolved + main_host_event._resolved = True + # store results in cache self._event_cache[event_host_hash] = main_host_event, dns_tags, event_whitelisted, event_blacklisted @@ -240,6 +178,99 @@ async def handle_event(self, event, **kwargs): if event.type == "DNS_NAME" and "unresolved" in event.tags: event.type = "DNS_NAME_UNRESOLVED" + async def resolve_event(self, event): + dns_tags = set() + event_whitelisted = False + event_blacklisted = False + + main_host_event = self.get_dns_parent(event) + event_host = str(event.host) + event_is_ip = self.helpers.is_ip(event.host) + + rdtypes_to_resolve = () + if event_is_ip: + if not self.minimal: + rdtypes_to_resolve = ("PTR",) + else: + if self.minimal: + rdtypes_to_resolve = ("A", "AAAA", "CNAME") + else: + rdtypes_to_resolve = all_rdtypes + + # if missing from cache, do DNS resolution + queries = [(event_host, rdtype) for rdtype in rdtypes_to_resolve] + error_rdtypes = [] + raw_record_events = [] + async for (query, rdtype), (answer, errors) in self.helpers.dns.resolve_raw_batch(queries): + if self.emit_raw_records and rdtype not in ("A", "AAAA", "CNAME", "PTR"): + raw_record_event = self.make_event( + {"host": str(event_host), "type": rdtype, "answer": answer.to_text()}, + "RAW_DNS_RECORD", + parent=main_host_event, + tags=[f"{rdtype.lower()}-record"], + context=f"{rdtype} lookup on {{event.parent.host}} produced {{event.type}}", + ) + raw_record_events.append(raw_record_event) + if errors: + error_rdtypes.append(rdtype) + dns_tags.add(f"{rdtype.lower()}-record") + for _rdtype, host in extract_targets(answer): + try: + main_host_event.dns_children[_rdtype].add(host) + except KeyError: + main_host_event.dns_children[_rdtype] = {host} + + # if there were dns resolution errors, notify the user with tags + for rdtype in error_rdtypes: + if rdtype not in main_host_event.dns_children: + dns_tags.add(f"{rdtype.lower()}-error") + + # if there weren't any DNS children and it's not an IP address, tag as unresolved + if not main_host_event.dns_children and not event_is_ip: + dns_tags.add("unresolved") + + # check DNS children against whitelists and blacklists + for rdtype, children in main_host_event.dns_children.items(): + if event_blacklisted: + break + for host in children: + # whitelisting / blacklisting based on resolved hosts + if rdtype in ("A", "AAAA", "CNAME"): + # having a CNAME to an in-scope resource doesn't make you in-scope + if (not event_whitelisted) and rdtype != "CNAME": + with suppress(ValidationError): + if self.scan.whitelisted(host): + event_whitelisted = True + dns_tags.add(f"dns-whitelisted-{rdtype.lower()}") + # CNAME to a blacklisted resource, means you're blacklisted + with suppress(ValidationError): + if self.scan.blacklisted(host): + dns_tags.add("blacklisted") + dns_tags.add(f"dns-blacklisted-{rdtype.lower()}") + event_blacklisted = True + event_whitelisted = False + break + + # check for private IPs + try: + ip = ipaddress.ip_address(host) + if ip.is_private: + dns_tags.add("private-ip") + except ValueError: + continue + + # add DNS tags to main host + for tag in dns_tags: + main_host_event.add_tag(tag) + + # set resolved_hosts attribute + for rdtype, children in main_host_event.dns_children.items(): + if rdtype in ("A", "AAAA", "CNAME"): + for host in children: + main_host_event._resolved_hosts.add(host) + + return main_host_event, dns_tags, event_whitelisted, event_blacklisted, raw_record_events + async def handle_wildcard_event(self, event): self.debug(f"Entering handle_wildcard_event({event})") try: @@ -299,3 +330,24 @@ def get_dns_parent(self, event): context="{event.parent.type} has host {event.type}: {event.host}", tags=tags, ) + + def _make_dummy_module(self, name): + try: + dummy_module = self.scan.dummy_modules[name] + except KeyError: + dummy_module = self.scan._make_dummy_module(name=name, _type="DNS") + dummy_module._priority = 4 + dummy_module.suppress_dupes = False + self.scan.dummy_modules[name] = dummy_module + return dummy_module + + def _dns_child_dedup_hash(self, parent_host, host, rdtype): + # we deduplicate NS records by their parent domain + # because otherwise every DNS_NAME has one, and it gets super messy + if rdtype == "NS": + _, parent_domain = self.helpers.split_domain(parent_host) + return hash(f"{parent_domain}:{host}") + return hash(f"{parent_host}:{host}:{rdtype}") + + def _main_outgoing_dedup_hash(self, event): + return hash(f"{event.host}") diff --git a/bbot/modules/templates/bucket.py b/bbot/modules/templates/bucket.py index f5a10387f..3b7bde789 100644 --- a/bbot/modules/templates/bucket.py +++ b/bbot/modules/templates/bucket.py @@ -53,7 +53,7 @@ async def handle_dns_name(self, event): bucket_name = d.join(split) buckets.add(bucket_name) async for bucket_name, url, tags, num_buckets in self.brute_buckets(buckets, permutations=self.permutations): - await self.emit_event( + await self.emit_storage_bucket( {"name": bucket_name, "url": url}, "STORAGE_BUCKET", parent=event, @@ -79,7 +79,7 @@ async def handle_storage_bucket(self, event): async for bucket_name, new_url, tags, num_buckets in self.brute_buckets( [bucket_name], permutations=self.permutations, omit_base=True ): - await self.emit_event( + await self.emit_storage_bucket( {"name": bucket_name, "url": new_url}, "STORAGE_BUCKET", parent=event, @@ -87,6 +87,17 @@ async def handle_storage_bucket(self, event): context=f"{{module}} tried {num_buckets:,} variations of {url} and found {{event.type}} at {new_url}", ) + async def emit_storage_bucket(self, event_data, event_type, parent, tags, context): + event_data["url"] = self.clean_bucket_url(event_data["url"]) + self.hugewarning(event_data) + await self.emit_event( + event_data, + event_type, + parent=parent, + tags=tags, + context=context, + ) + async def brute_buckets(self, buckets, permutations=False, omit_base=False): buckets = set(buckets) new_buckets = set(buckets) @@ -112,6 +123,10 @@ async def brute_buckets(self, buckets, permutations=False, omit_base=False): if existent_bucket: yield bucket_name, url, tags, num_buckets + def clean_bucket_url(self, url): + # if needed, modify the bucket url before emitting it + return url + def build_bucket_request(self, bucket_name, base_domain, region): url = self.build_url(bucket_name, base_domain, region) return url, {} diff --git a/bbot/scanner/manager.py b/bbot/scanner/manager.py index cdae044a8..70658e69d 100644 --- a/bbot/scanner/manager.py +++ b/bbot/scanner/manager.py @@ -224,6 +224,12 @@ async def handle_event(self, event, **kwargs): self.debug(f"Re-queuing internal event {parent} with parent {event} to prevent graph orphan") await self.emit_event(parent) + if event._suppress_chain_dupes: + for parent in event.get_parents(): + if parent == event: + return False, f"an identical parent {event} was found, and _suppress_chain_dupes=True" + + # custom callback - abort event emission it returns true abort_result = False if callable(abort_if): async with self.scan._acatch(context=abort_if): diff --git a/bbot/scanner/preset/preset.py b/bbot/scanner/preset/preset.py index d2d55ad39..9e8242ea9 100644 --- a/bbot/scanner/preset/preset.py +++ b/bbot/scanner/preset/preset.py @@ -97,6 +97,7 @@ def __init__( config=None, module_dirs=None, include=None, + presets=None, output_dir=None, scan_name=None, name=None, @@ -125,6 +126,7 @@ def __init__( module_dirs (list[str], optional): additional directories to load modules from. config (dict, optional): Additional scan configuration settings. include (list[str], optional): names or filenames of other presets to include. + presets (list[str], optional): an alias for `include`. output_dir (str or Path, optional): Directory to store scan output. Defaults to BBOT home directory (`~/.bbot`). scan_name (str, optional): Human-readable name of the scan. If not specified, it will be random, e.g. "demonic_jimmy". name (str, optional): Human-readable name of the preset. Used mainly for logging. @@ -240,6 +242,13 @@ def __init__( self._target = None + # "presets" is alias to "include" + if presets and include: + raise ValueError( + 'Cannot use both "presets" and "include" args at the same time (presets is only an alias to include). Please pick only one :)' + ) + if presets and not include: + include = presets # include other presets if include and not isinstance(include, (list, tuple, set)): include = [include] diff --git a/bbot/scanner/scanner.py b/bbot/scanner/scanner.py index fa2abfadf..4e1d5a104 100644 --- a/bbot/scanner/scanner.py +++ b/bbot/scanner/scanner.py @@ -1208,16 +1208,6 @@ def _make_dummy_module(self, name, _type="scan"): self.dummy_modules[name] = dummy return dummy - def _make_dummy_module_dns(self, name): - try: - dummy_module = self.dummy_modules[name] - except KeyError: - dummy_module = self._make_dummy_module(name=name, _type="DNS") - dummy_module.suppress_dupes = False - dummy_module._priority = 4 - self.dummy_modules[name] = dummy_module - return dummy_module - from bbot.modules.base import BaseModule diff --git a/bbot/test/test_step_1/test_dns.py b/bbot/test/test_step_1/test_dns.py index b2bcb68fe..5f98f4939 100644 --- a/bbot/test/test_step_1/test_dns.py +++ b/bbot/test/test_step_1/test_dns.py @@ -241,16 +241,20 @@ async def test_wildcards(bbot_scanner): assert wildcard_event3.data == "github.io" # dns resolve distance - event_distance_0 = scan.make_event("8.8.8.8", module=scan._make_dummy_module_dns("PTR"), parent=scan.root_event) + event_distance_0 = scan.make_event( + "8.8.8.8", module=scan.modules["dnsresolve"]._make_dummy_module("PTR"), parent=scan.root_event + ) assert event_distance_0.dns_resolve_distance == 0 event_distance_1 = scan.make_event( - "evilcorp.com", module=scan._make_dummy_module_dns("A"), parent=event_distance_0 + "evilcorp.com", module=scan.modules["dnsresolve"]._make_dummy_module("A"), parent=event_distance_0 ) assert event_distance_1.dns_resolve_distance == 1 - event_distance_2 = scan.make_event("1.2.3.4", module=scan._make_dummy_module_dns("PTR"), parent=event_distance_1) + event_distance_2 = scan.make_event( + "1.2.3.4", module=scan.modules["dnsresolve"]._make_dummy_module("PTR"), parent=event_distance_1 + ) assert event_distance_2.dns_resolve_distance == 1 event_distance_3 = scan.make_event( - "evilcorp.org", module=scan._make_dummy_module_dns("A"), parent=event_distance_2 + "evilcorp.org", module=scan.modules["dnsresolve"]._make_dummy_module("A"), parent=event_distance_2 ) assert event_distance_3.dns_resolve_distance == 2 diff --git a/bbot/test/test_step_1/test_events.py b/bbot/test/test_step_1/test_events.py index 4f42c1bb0..c319559d4 100644 --- a/bbot/test/test_step_1/test_events.py +++ b/bbot/test/test_step_1/test_events.py @@ -587,9 +587,23 @@ async def handle_event(self, event): scan.modules["dummy_module"] = dummy_module + # test discovery context test_event = dummy_module.make_event("evilcorp.com", "DNS_NAME", parent=scan.root_event) assert test_event.discovery_context == "dummy_module discovered DNS_NAME: evilcorp.com" + test_event2 = dummy_module.make_event( + "evilcorp.com", "DNS_NAME", parent=scan.root_event, context="{module} {found} {event.host}" + ) + assert test_event2.discovery_context == "dummy_module {found} evilcorp.com" + # jank input + test_event3 = dummy_module.make_event( + "http://evilcorp.com/{http://evilcorp.org!@#%@#$:,,,}", "URL_UNVERIFIED", parent=scan.root_event + ) + assert ( + test_event3.discovery_context + == "dummy_module discovered URL_UNVERIFIED: http://evilcorp.com/{http:/evilcorp.org!@" + ) + events = [e async for e in scan.async_start()] assert len(events) == 6 diff --git a/bbot/test/test_step_1/test_presets.py b/bbot/test/test_step_1/test_presets.py index 89c523df0..768ee3429 100644 --- a/bbot/test/test_step_1/test_presets.py +++ b/bbot/test/test_step_1/test_presets.py @@ -725,6 +725,7 @@ def test_preset_include(): """ ) + # with include= preset = Preset(include=[str(custom_preset_dir_1 / "preset1")]) assert preset.config.modules.testpreset1.test == "asdf" assert preset.config.modules.testpreset2.test == "fdsa" @@ -732,6 +733,18 @@ def test_preset_include(): assert preset.config.modules.testpreset4.test == "zxcv" assert preset.config.modules.testpreset5.test == "hjkl" + # same thing but with presets= (an alias to include) + preset = Preset(presets=[str(custom_preset_dir_1 / "preset1")]) + assert preset.config.modules.testpreset1.test == "asdf" + assert preset.config.modules.testpreset2.test == "fdsa" + assert preset.config.modules.testpreset3.test == "qwerty" + assert preset.config.modules.testpreset4.test == "zxcv" + assert preset.config.modules.testpreset5.test == "hjkl" + + # can't use both include= and presets= at the same time + with pytest.raises(ValueError): + preset = Preset(presets=["subdomain-enum"], include=["dirbust-light"]) + @pytest.mark.asyncio async def test_preset_conditions(): diff --git a/bbot/test/test_step_1/test_python_api.py b/bbot/test/test_step_1/test_python_api.py index 678593ed1..1c2b0bb51 100644 --- a/bbot/test/test_step_1/test_python_api.py +++ b/bbot/test/test_step_1/test_python_api.py @@ -56,6 +56,10 @@ async def test_python_api(): events = [e async for e in custom_target_scan.async_start()] assert 1 == len([e for e in events if e.type == "ORG_STUB" and e.data == "evilcorp" and "target" in e.tags]) + # presets + scan6 = Scanner("evilcorp.com", presets=["subdomain-enum"]) + assert "sslcert" in scan6.preset.modules + def test_python_api_sync(): from bbot.scanner import Scanner diff --git a/bbot/test/test_step_2/module_tests/test_module_bucket_azure.py b/bbot/test/test_step_2/module_tests/test_module_bucket_azure.py index 782a71c32..a3c866c08 100644 --- a/bbot/test/test_step_2/module_tests/test_module_bucket_azure.py +++ b/bbot/test/test_step_2/module_tests/test_module_bucket_azure.py @@ -1,4 +1,5 @@ from .test_module_bucket_amazon import * +from .base import ModuleTestBase class TestBucket_Azure(Bucket_Amazon_Base): @@ -11,3 +12,45 @@ def url_setup(self): self.url_1 = f"https://{self.random_bucket_1}" self.url_2 = f"https://{self.random_bucket_2}" self.url_3 = f"https://{self.random_bucket_3}/{random_bucket_name_3}?restype=container" + + +class TestBucket_Azure_NoDup(ModuleTestBase): + targets = ["tesla.com"] + module_name = "bucket_azure" + config_overrides = {"cloudcheck": True} + + async def setup_before_prep(self, module_test): + module_test.httpx_mock.add_response( + url=f"https://tesla.blob.core.windows.net/tesla?restype=container", + text="", + ) + await module_test.mock_dns( + { + "tesla.com": {"A": ["1.2.3.4"]}, + "tesla.blob.core.windows.net": {"A": ["1.2.3.4"]}, + } + ) + + def check(self, module_test, events): + assert 1 == len([e for e in events if e.type == "STORAGE_BUCKET"]) + bucket_event = [e for e in events if e.type == "STORAGE_BUCKET"][0] + assert bucket_event.data["name"] == "tesla" + assert bucket_event.data["url"] == "https://tesla.blob.core.windows.net/" + assert ( + bucket_event.discovery_context + == f"bucket_azure tried bucket variations of {event.data} and found {{event.type}} at {url}" + ) + + +class TestBucket_Azure_NoDup(TestBucket_Azure_NoDup): + """ + This tests _suppress_chain_dupes functionality to make sure it works as expected + """ + + async def setup_after_prep(self, module_test): + from bbot.core.event.base import STORAGE_BUCKET + + module_test.monkeypatch.setattr(STORAGE_BUCKET, "_suppress_chain_dupes", False) + + def check(self, module_test, events): + assert 2 == len([e for e in events if e.type == "STORAGE_BUCKET"]) diff --git a/docs/dev/index.md b/docs/dev/index.md index 8a29e48a7..6315637f0 100644 --- a/docs/dev/index.md +++ b/docs/dev/index.md @@ -4,6 +4,18 @@ BBOT exposes a Python API that allows you to create, start, and stop scans. Documented in this section are commonly-used classes and functions within BBOT, along with usage examples. +## Adding BBOT to Your Python Project + +If you are using Poetry, you can add BBOT to your python environment like this: + +```bash +# stable +poetry add bbot + +# bleeding-edge (dev branch) +poetry add bbot --allow-prereleases +``` + ## Running a BBOT Scan from Python #### Synchronous