diff --git a/bbot/cli.py b/bbot/cli.py index 9a8ab51dd..275d156ec 100755 --- a/bbot/cli.py +++ b/bbot/cli.py @@ -6,9 +6,9 @@ import asyncio import logging import traceback -from aioconsole import ainput from omegaconf import OmegaConf from contextlib import suppress +from aioconsole import stream # fix tee buffering sys.stdout.reconfigure(line_buffering=True) @@ -20,6 +20,7 @@ from bbot import __version__ from bbot.modules import module_loader from bbot.core.configurator.args import parser +from bbot.core.helpers.misc import smart_decode from bbot.core.helpers.logger import log_to_stderr from bbot.core.configurator import ensure_config_files, check_cli_args, environ @@ -301,46 +302,56 @@ async def _main(): if not options.dry_run: log.trace(f"Command: {' '.join(sys.argv)}") - if not options.agent_mode and not options.yes and sys.stdin.isatty(): - log.hugesuccess(f"Scan ready. Press enter to execute {scanner.name}") - input() - - def handle_keyboard_input(keyboard_input): - kill_regex = re.compile(r"kill (?P[a-z0-9_]+)") - if keyboard_input: - log.verbose(f'Got keyboard input: "{keyboard_input}"') - kill_match = kill_regex.match(keyboard_input) - if kill_match: - module = kill_match.group("module") - if module in scanner.modules: - log.hugewarning(f'Killing module: "{module}"') - scanner.manager.kill_module(module, message="killed by user") - else: - log.warning(f'Invalid module: "{module}"') - else: - toggle_log_level(logger=log) - scanner.manager.modules_status(_log=True) - async def akeyboard_listen(): - allowed_errors = 10 - while 1: - keyboard_input = "a" + if sys.stdin.isatty(): + if not options.agent_mode and not options.yes: + log.hugesuccess(f"Scan ready. Press enter to execute {scanner.name}") + input() + + def handle_keyboard_input(keyboard_input): + kill_regex = re.compile(r"kill (?P[a-z0-9_]+)") + if keyboard_input: + log.verbose(f'Got keyboard input: "{keyboard_input}"') + kill_match = kill_regex.match(keyboard_input) + if kill_match: + module = kill_match.group("module") + if module in scanner.modules: + log.hugewarning(f'Killing module: "{module}"') + scanner.manager.kill_module(module, message="killed by user") + else: + log.warning(f'Invalid module: "{module}"') + else: + toggle_log_level(logger=log) + scanner.manager.modules_status(_log=True) + + # Reader + reader = stream.StandardStreamReader() + protocol = stream.StandardStreamReaderProtocol(reader) + await asyncio.get_event_loop().connect_read_pipe(lambda: protocol, sys.stdin) + + async def akeyboard_listen(): try: - keyboard_input = await ainput() - except Exception: - allowed_errors -= 1 - handle_keyboard_input(keyboard_input) - if allowed_errors <= 0: - break - - try: - keyboard_listen_task = asyncio.create_task(akeyboard_listen()) - - await scanner.async_start_without_generator() - finally: - keyboard_listen_task.cancel() - with suppress(asyncio.CancelledError): - await keyboard_listen_task + allowed_errors = 10 + while 1: + keyboard_input = None + try: + keyboard_input = smart_decode((await reader.readline()).strip()) + allowed_errors = 10 + except Exception as e: + log_to_stderr(f"Error in keyboard listen loop: {e}", level="TRACE") + log_to_stderr(traceback.format_exc(), level="TRACE") + allowed_errors -= 1 + if keyboard_input is not None: + handle_keyboard_input(keyboard_input) + if allowed_errors <= 0: + break + except Exception as e: + log_to_stderr(f"Error in keyboard listen task: {e}", level="ERROR") + log_to_stderr(traceback.format_exc(), level="TRACE") + + asyncio.create_task(akeyboard_listen()) + + await scanner.async_start_without_generator() except bbot.core.errors.ScanError as e: log_to_stderr(str(e), level="ERROR") diff --git a/bbot/core/helpers/async_helpers.py b/bbot/core/helpers/async_helpers.py index 4cc701161..8434ccb0f 100644 --- a/bbot/core/helpers/async_helpers.py +++ b/bbot/core/helpers/async_helpers.py @@ -5,13 +5,12 @@ import threading from datetime import datetime from queue import Queue, Empty +from cachetools import LRUCache from .misc import human_timedelta from contextlib import asynccontextmanager log = logging.getLogger("bbot.core.helpers.async_helpers") -from .cache import CacheDict - class ShuffleQueue(asyncio.Queue): def _put(self, item): @@ -32,20 +31,20 @@ class NamedLock: """ Returns a unique asyncio.Lock() based on a provided string - Useful for preventing multiple operations from occuring on the same data in parallel + Useful for preventing multiple operations from occurring on the same data in parallel E.g. simultaneous DNS lookups on the same hostname """ def __init__(self, max_size=1000): - self._cache = CacheDict(max_size=max_size) + self._cache = LRUCache(maxsize=max_size) @asynccontextmanager async def lock(self, name): try: - lock = self._cache.get(name) + lock = self._cache[name] except KeyError: lock = _Lock(name) - self._cache.put(name, lock) + self._cache[name] = lock async with lock: yield diff --git a/bbot/core/helpers/cache.py b/bbot/core/helpers/cache.py index 3eb54daf7..3a70fbd24 100644 --- a/bbot/core/helpers/cache.py +++ b/bbot/core/helpers/cache.py @@ -1,8 +1,6 @@ import os import time import logging -from contextlib import suppress -from collections import OrderedDict from .misc import sha1 @@ -53,84 +51,3 @@ def is_cached(self, key, cache_hrs=24 * 7): def cache_filename(self, key): return self.cache_dir / sha1(key).hexdigest() - - -_sentinel = object() - - -class CacheDict: - """ - Dictionary to store cached values, with a maximum size limit - """ - - def __init__(self, max_size=1000): - self._cache = OrderedDict() - self._max_size = int(max_size) - - def get(self, name, fallback=_sentinel): - name_hash = self._hash(name) - try: - return self._cache[name_hash] - except KeyError: - if fallback is not _sentinel: - return fallback - raise - finally: - with suppress(KeyError): - self._cache.move_to_end(name_hash) - self._truncate() - - def put(self, name, value): - name_hash = self._hash(name) - try: - self._cache[name_hash] = value - finally: - with suppress(KeyError): - self._cache.move_to_end(name_hash) - self._truncate() - - def _truncate(self): - if not self or len(self) <= self._max_size: - return - for nh in list(self._cache.keys()): - try: - del self._cache[nh] - except KeyError: - pass - if not self or len(self) <= self._max_size: - break - - def keys(self): - return self._cache.keys() - - def values(self): - return self._cache.values() - - def items(self): - return self._cache.items() - - def clear(self): - return self._cache.clear() - - def _hash(self, v): - if type(v) == int: - return v - return hash(str(v)) - - def __contains__(self, item): - return self._hash(item) in self._cache - - def __iter__(self): - return iter(self._cache) - - def __getitem__(self, item): - return self.get(item) - - def __setitem__(self, item, value): - self.put(item, value) - - def __bool__(self): - return bool(self._cache) - - def __len__(self): - return len(self._cache) diff --git a/bbot/core/helpers/depsinstaller/installer.py b/bbot/core/helpers/depsinstaller/installer.py index 00662b969..049baef86 100644 --- a/bbot/core/helpers/depsinstaller/installer.py +++ b/bbot/core/helpers/depsinstaller/installer.py @@ -157,9 +157,9 @@ async def pip_install(self, packages, constraints=None): command = [sys.executable, "-m", "pip", "install", "--upgrade"] + packages if constraints: - contraints_tempfile = self.parent_helper.tempfile(constraints, pipe=False) + constraints_tempfile = self.parent_helper.tempfile(constraints, pipe=False) command.append("--constraint") - command.append(contraints_tempfile) + command.append(constraints_tempfile) process = None try: diff --git a/bbot/core/helpers/dns.py b/bbot/core/helpers/dns.py index b81194403..ecfc71b94 100644 --- a/bbot/core/helpers/dns.py +++ b/bbot/core/helpers/dns.py @@ -7,13 +7,14 @@ import contextlib import dns.exception import dns.asyncresolver +from cachetools import LRUCache from contextlib import suppress from .regexes import dns_name_regex from bbot.core.helpers.ratelimiter import RateLimiter from bbot.core.helpers.async_helpers import NamedLock from bbot.core.errors import ValidationError, DNSError, DNSWildcardBreak -from .misc import is_ip, is_domain, is_dns_name, domain_parents, parent_domain, rand_string, cloudcheck, as_completed +from .misc import is_ip, is_domain, is_dns_name, domain_parents, parent_domain, rand_string, cloudcheck log = logging.getLogger("bbot.core.helpers.dns") @@ -64,8 +65,8 @@ class DNSHelper: wildcard_ignore (tuple): Domains to be ignored during wildcard detection. wildcard_tests (int): Number of tests to be run for wildcard detection. Defaults to 5. _wildcard_cache (dict): Cache for wildcard detection results. - _dns_cache (CacheDict): Cache for DNS resolution results, limited in size. - _event_cache (CacheDict): Cache for event resolution results, tags. Limited in size. + _dns_cache (LRUCache): Cache for DNS resolution results, limited in size. + _event_cache (LRUCache): Cache for event resolution results, tags. Limited in size. resolver_file (Path): File containing system's current resolver nameservers. filter_bad_ptrs (bool): Whether to filter out DNS names that appear to be auto-generated PTR records. Defaults to True. @@ -130,8 +131,8 @@ def __init__(self, parent_helper): self.fallback_nameservers_file = self.parent_helper.wordlist_dir / "nameservers.txt" self._debug = self.parent_helper.config.get("dns_debug", False) self._dummy_modules = dict() - self._dns_cache = self.parent_helper.CacheDict(max_size=100000) - self._event_cache = self.parent_helper.CacheDict(max_size=10000) + self._dns_cache = LRUCache(maxsize=10000) + self._event_cache = LRUCache(maxsize=10000) self._event_cache_locks = NamedLock() # for mocking DNS queries @@ -530,9 +531,8 @@ async def resolve_event(self, event, minimal=False): types = ("A", "AAAA") if types: - tasks = [self.resolve_raw(event_host, type=t, use_cache=True) for t in types] - async for task in as_completed(tasks): - resolved_raw, errors = await task + for t in types: + resolved_raw, errors = await self.resolve_raw(event_host, type=t, use_cache=True) for rdtype, e in errors: if rdtype not in resolved_raw: event_tags.add(f"{rdtype.lower()}-error") @@ -631,24 +631,13 @@ def event_cache_get(self, host): except KeyError: return set(), None, None, set() - async def _resolve_batch_coro_wrapper(self, q, **kwargs): - """ - Helps us correlate task results back to their original arguments - """ - result = await self.resolve(q, **kwargs) - return (q, result) - async def resolve_batch(self, queries, **kwargs): """ - Asynchronously resolves a batch of queries in parallel and yields the results as they are completed. - - This method wraps around `_resolve_batch_coro_wrapper` to resolve a list of queries in parallel. - It batches the queries to a manageable size and executes them asynchronously, respecting - global rate limits. + A helper to execute a bunch of DNS requests. Args: queries (list): List of queries to resolve. - **kwargs: Additional keyword arguments to pass to `_resolve_batch_coro_wrapper`. + **kwargs: Additional keyword arguments to pass to `resolve()`. Yields: tuple: A tuple containing the original query and its resolved value. @@ -662,13 +651,8 @@ async def resolve_batch(self, queries, **kwargs): ('evilcorp.com', {'2.2.2.2'}) """ - queries = list(queries) - batch_size = 250 - for i in range(0, len(queries), batch_size): - batch = queries[i : i + batch_size] - tasks = [asyncio.create_task(self._resolve_batch_coro_wrapper(q, **kwargs)) for q in batch] - async for task in as_completed(tasks): - yield await task + for q in queries: + yield (q, await self.resolve(q, **kwargs)) def extract_targets(self, record): """ @@ -841,14 +825,11 @@ async def is_wildcard(self, query, ips=None, rdtype=None): # if the caller hasn't already done the work of resolving the IPs if ips is None: # then resolve the query for all rdtypes - base_query_tasks = { - t: asyncio.create_task(self.resolve_raw(query, type=t, use_cache=True)) for t in rdtypes_to_check - } - for _rdtype, task in base_query_tasks.items(): - raw_results, errors = await task + for t in rdtypes_to_check: + raw_results, errors = await self.resolve_raw(query, type=t, use_cache=True) if errors and not raw_results: - self.debug(f"Failed to resolve {query} ({_rdtype}) during wildcard detection") - result[_rdtype] = (None, parent) + self.debug(f"Failed to resolve {query} ({t}) during wildcard detection") + result[t] = (None, parent) continue for __rdtype, answers in raw_results: base_query_results = set() @@ -872,12 +853,13 @@ async def is_wildcard(self, query, ips=None, rdtype=None): # for every parent domain, starting with the shortest try: for host in parents[::-1]: + # make sure we've checked that domain for wildcards + await self.is_wildcard_domain(host) + # for every rdtype for _rdtype in list(base_query_ips): # get the IPs from above query_ips = base_query_ips.get(_rdtype, set()) - # make sure we've checked that domain for wildcards - await self.is_wildcard_domain(host) host_hash = hash(host) if host_hash in self._wildcard_cache: @@ -953,24 +935,20 @@ async def is_wildcard_domain(self, domain, log_info=False): wildcard_domain_results[host] = self._wildcard_cache[host_hash] continue + log.verbose(f"Checking if {host} is a wildcard") + # determine if this is a wildcard domain - wildcard_tasks = {t: [] for t in rdtypes_to_check} + # resolve a bunch of random subdomains of the same parent - for rdtype in rdtypes_to_check: + is_wildcard = False + wildcard_results = dict() + for rdtype in list(rdtypes_to_check): # continue if a wildcard was already found for this rdtype # if rdtype in self._wildcard_cache[host_hash]: # continue for _ in range(self.wildcard_tests): rand_query = f"{rand_string(digits=False, length=10)}.{host}" - wildcard_task = asyncio.create_task(self.resolve(rand_query, type=rdtype, use_cache=False)) - wildcard_tasks[rdtype].append(wildcard_task) - - # combine the random results - is_wildcard = False - wildcard_results = dict() - for rdtype, tasks in wildcard_tasks.items(): - async for task in as_completed(tasks): - results = await task + results = await self.resolve(rand_query, type=rdtype, use_cache=False) if results: is_wildcard = True if not rdtype in wildcard_results: @@ -989,6 +967,8 @@ async def is_wildcard_domain(self, domain, log_info=False): if log_info: log_fn = log.info log_fn(f"Encountered domain with wildcard DNS ({wildcard_rdtypes_str}): {host}") + else: + log.verbose(f"Finished checking {host}, it is not a wildcard") return wildcard_domain_results @@ -1035,7 +1015,7 @@ def _parse_rdtype(self, t, default=None): def debug(self, *args, **kwargs): if self._debug: - log.debug(*args, **kwargs) + log.trace(*args, **kwargs) def _get_dummy_module(self, name): try: diff --git a/bbot/core/helpers/helper.py b/bbot/core/helpers/helper.py index 36c6346c9..899f3ab0b 100644 --- a/bbot/core/helpers/helper.py +++ b/bbot/core/helpers/helper.py @@ -48,8 +48,8 @@ class ConfigAwareHelper: from . import regexes from . import validators from .files import tempfile, feed_pipe, _feed_pipe, tempfile_tail + from .cache import cache_get, cache_put, cache_filename, is_cached from .command import run, run_live, _spawn_proc, _prepare_command_kwargs - from .cache import cache_get, cache_put, cache_filename, is_cached, CacheDict def __init__(self, config, scan=None): self.config = config diff --git a/bbot/core/helpers/misc.py b/bbot/core/helpers/misc.py index 283bc3782..a67e89402 100644 --- a/bbot/core/helpers/misc.py +++ b/bbot/core/helpers/misc.py @@ -66,9 +66,14 @@ def is_domain(d): - Port, if present in input, is ignored. """ d, _ = split_host_port(d) + if is_ip(d): + return False extracted = tldextract(d) - if extracted.domain and not extracted.subdomain: - return True + if extracted.registered_domain: + if not extracted.subdomain: + return True + else: + return d.count(".") == 1 return False @@ -96,9 +101,14 @@ def is_subdomain(d): - Port, if present in input, is ignored. """ d, _ = split_host_port(d) + if is_ip(d): + return False extracted = tldextract(d) - if extracted.domain and extracted.subdomain: - return True + if extracted.registered_domain: + if extracted.subdomain: + return True + else: + return d.count(".") > 1 return False @@ -327,6 +337,23 @@ def domain_parents(d, include_self=False): break +def subdomain_depth(d): + """ + Calculate the depth of subdomains within a given domain name. + + Args: + d (str): The domain name to analyze. + + Returns: + int: The depth of the subdomain. For example, a hostname "5.4.3.2.1.evilcorp.com" + has a subdomain depth of 5. + """ + subdomain, domain = split_domain(d) + if not subdomain: + return 0 + return subdomain.count(".") + 1 + + def parent_url(u): """ Retrieve the parent URL of a given URL. @@ -601,9 +628,6 @@ def is_ip(d, version=None): >>> is_ip('evilcorp.com') False """ - if isinstance(d, (ipaddress.IPv4Address, ipaddress.IPv6Address)): - if version is None or version == d.version: - return True try: ip = ipaddress.ip_address(d) if version is None or ip.version == version: @@ -1427,7 +1451,7 @@ def search_dict_values(d, *regexes): ... ] ... } ... } - >>> url_regexes = re.compile(r'https?://[^\s<>"]+|www\.[^\s<>"]+') + >>> url_regexes = re.compile(r'https?://[^\\s<>"]+|www\.[^\\s<>"]+') >>> list(search_dict_values(dict_to_search, url_regexes)) ["https://www.evilcorp.com"] """ diff --git a/bbot/core/helpers/ntlm.py b/bbot/core/helpers/ntlm.py index e4d9cd1ca..8605ef34a 100644 --- a/bbot/core/helpers/ntlm.py +++ b/bbot/core/helpers/ntlm.py @@ -38,7 +38,7 @@ def __init__(self, pos_tup, raw): def decode_ntlm_challenge(st): hdr_tup = struct.unpack(" self.max_depth: + eligible = False + reason = f"subdomain depth of *.{query} ({subdomain_depth}) > max_depth ({self.max_depth})" + + # don't brute-force things that look like autogenerated PTRs + if self.helpers.is_ptr(query): + eligible = False + reason = f'"{query}" looks like an autogenerated PTR' + if eligible: self.add_found(event) # reject if already processed if self.already_processed(query): return False, f'Query "{query}" was already processed' + if eligible: self.processed.add(hash(query)) return True, reason @@ -116,23 +136,18 @@ async def filter_event(self, event): async def handle_event(self, event): query = self.make_query(event) self.source_events.add_target(event) - self.info(f"Brute-forcing subdomains for {query} (source: {event.data})") - for hostname in await self.massdns(query, self.subdomain_list): - await self.emit_result(hostname, event, query) + results = await self.massdns(query, self.subdomain_list) + await self.resolve_and_emit_queue.put((results, event, None)) def abort_if(self, event): if not event.scope_distance == 0: return True, "event is not in scope" if "wildcard" in event.tags: return True, "event is a wildcard" - - async def emit_result(self, result, source_event, query, tags=None): - if not result == source_event: - kwargs = {"abort_if": self.abort_if} - if tags is not None: - kwargs["tags"] = tags - await self.emit_event(result, "DNS_NAME", source_event, **kwargs) + if "unresolved" in event.tags: + return True, "event is unresolved" + return False, "" def already_processed(self, hostname): if hash(hostname) in self.processed: @@ -143,7 +158,7 @@ async def massdns(self, domain, subdomains): subdomains = list(subdomains) domain_wildcard_rdtypes = set() - for domain, rdtypes in (await self.helpers.is_wildcard_domain(domain)).items(): + for _domain, rdtypes in (await self.helpers.is_wildcard_domain(domain)).items(): for rdtype, results in rdtypes.items(): if results: domain_wildcard_rdtypes.add(rdtype) @@ -204,12 +219,36 @@ async def massdns(self, domain, subdomains): ) # everything checks out - self.verbose(f"Resolving batch of {len(results):,} results") - resolved = dict([l async for l in self.helpers.resolve_batch(results, type=("A", "CNAME"))]) - resolved = {k: v for k, v in resolved.items() if v} - for hostname in resolved: - self.add_found(hostname) - return list(resolved) + return results + + async def resolve_and_emit(self): + """ + When results are found, they are placed into self.resolve_and_emit_queue. + The purpose of this function (which is started as a task in the module's setup()) is to consume results from + the queue, resolve them, and if they resolve, emit them. + + This exists to prevent disrupting the scan with huge batches of DNS resolutions. + """ + while 1: + results, source_event, tags = await self.resolve_and_emit_queue.get() + self.verbose(f"Resolving batch of {len(results):,} results") + async with self._task_counter.count(f"{self.name}.resolve_and_emit()"): + async for hostname, r in self.helpers.resolve_batch(results, type=("A", "CNAME")): + if not r: + self.debug(f"Discarding {hostname} because it didn't resolve") + continue + self.add_found(hostname) + if source_event is None: + source_event = self.source_events.get(hostname) + if source_event is None: + self.warning(f"Could not correlate source event from: {hostname}") + source_event = self.scan.root_event + kwargs = {"abort_if": self.abort_if, "tags": tags} + await self.emit_event(hostname, "DNS_NAME", source_event, **kwargs) + + @property + def running(self): + return super().running or self.resolve_and_emit_queue.qsize() > 0 async def _canary_check(self, domain, num_checks=50): random_subdomains = list(self.gen_random_subdomains(num_checks)) @@ -339,6 +378,9 @@ def add_mutation(_domain_hash, m): self.mutations_tried.add(h) mutations.add(m) + num_base_mutations = len(base_mutations) + self.debug(f"Base mutations for {domain}: {num_base_mutations:,}") + # try every subdomain everywhere else for _domain, _subdomains in found: if _domain == domain: @@ -346,10 +388,7 @@ def add_mutation(_domain_hash, m): for s in _subdomains: first_segment = s.split(".")[0] # skip stuff with lots of numbers (e.g. PTRs) - digits = self.digit_regex.findall(first_segment) - excessive_digits = len(digits) > 2 - long_digits = any(len(d) > 3 for d in digits) - if excessive_digits or long_digits: + if self.has_excessive_digits(first_segment): continue add_mutation(domain_hash, first_segment) for word in self.helpers.extract_words( @@ -357,6 +396,9 @@ def add_mutation(_domain_hash, m): ): add_mutation(domain_hash, word) + num_massdns_mutations = len(mutations) - num_base_mutations + self.debug(f"Mutations from previous subdomains for {domain}: {num_massdns_mutations:,}") + # numbers + devops mutations for mutation in self.helpers.word_cloud.mutations( subdomains, cloud=False, numbers=3, number_padding=1 @@ -365,24 +407,26 @@ def add_mutation(_domain_hash, m): m = delimiter.join(mutation).lower() add_mutation(domain_hash, m) + num_word_cloud_mutations = len(mutations) - num_massdns_mutations + self.debug(f"Mutations added by word cloud for {domain}: {num_word_cloud_mutations:,}") + # special dns mutator + self.debug( + f"DNS Mutator size: {len(self.helpers.word_cloud.dns_mutator):,} (limited to {self.max_mutations:,})" + ) for subdomain in self.helpers.word_cloud.dns_mutator.mutations( subdomains, max_mutations=self.max_mutations ): add_mutation(domain_hash, subdomain) + num_mutations = len(mutations) - num_word_cloud_mutations + self.debug(f"Mutations added by DNS Mutator: {num_mutations:,}") + if mutations: self.info(f"Trying {len(mutations):,} mutations against {domain} ({i+1}/{len(found)})") results = list(await self.massdns(query, mutations)) - for hostname in results: - source_event = self.source_events.get(hostname) - if source_event is None: - self.warning(f"Could not correlate source event from: {hostname}") - source_event = self.scan.root_event - await self.emit_result( - hostname, source_event, query, tags=[f"mutation-{self._mutation_run}"] - ) if results: + await self.resolve_and_emit_queue.put((results, None, [f"mutation-{self.mutation_run}"])) found_mutations = True continue break @@ -390,7 +434,7 @@ def add_mutation(_domain_hash, m): self.warning(e) if found_mutations: - self._mutation_run += 1 + self.mutation_run += 1 def add_found(self, host): if not isinstance(host, str): @@ -422,3 +466,16 @@ def gen_random_subdomains(self, n=50): yield subdomain for _ in range(5): yield self.helpers.rand_string(length=8, digits=False) + + def has_excessive_digits(self, d): + """ + Identifies dns names with excessive numbers, e.g.: + - w1-2-3.evilcorp.com + - ptr1234.evilcorp.com + """ + digits = self.digit_regex.findall(d) + excessive_digits = len(digits) > 2 + long_digits = any(len(d) > 3 for d in digits) + if excessive_digits or long_digits: + return True + return False diff --git a/bbot/modules/output/emails.py b/bbot/modules/output/emails.py index 029bc5aca..e96c5d97c 100644 --- a/bbot/modules/output/emails.py +++ b/bbot/modules/output/emails.py @@ -12,14 +12,19 @@ class Emails(Human): output_filename = "emails.txt" + async def setup(self): + self.emails_written = 0 + return await super().setup() + def _scope_distance_check(self, event): return BaseModule._scope_distance_check(self, event) async def handle_event(self, event): if self.file is not None: + self.emails_written += 1 self.file.write(f"{event.data}\n") self.file.flush() async def report(self): if getattr(self, "_file", None) is not None: - self.info(f"Saved email addresses to {self.output_file}") + self.info(f"Saved {self.emails_written:,} email addresses to {self.output_file}") diff --git a/bbot/modules/output/subdomains.py b/bbot/modules/output/subdomains.py index 49dea2db8..bfb7174ac 100644 --- a/bbot/modules/output/subdomains.py +++ b/bbot/modules/output/subdomains.py @@ -15,6 +15,7 @@ class Subdomains(Human): async def setup(self): self.include_unresolved = self.config.get("include_unresolved", False) + self.subdomains_written = 0 return await super().setup() async def filter_event(self, event): @@ -27,9 +28,10 @@ def _scope_distance_check(self, event): async def handle_event(self, event): if self.file is not None: + self.subdomains_written += 1 self.file.write(f"{event.data}\n") self.file.flush() async def report(self): if getattr(self, "_file", None) is not None: - self.info(f"Saved subdomains to {self.output_file}") + self.info(f"Saved {self.subdomains_written:,} subdomains to {self.output_file}") diff --git a/bbot/modules/sitedossier.py b/bbot/modules/sitedossier.py index 0c797296a..86872c052 100644 --- a/bbot/modules/sitedossier.py +++ b/bbot/modules/sitedossier.py @@ -43,5 +43,5 @@ async def query(self, query, parse_fn=None, request_fn=None): results.add(hostname) yield hostname if '=3.7.4)", "aiohttp (>=3.7.4,!=3.9.0)"] jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"] uvloop = ["uvloop (>=0.15.2)"] +[[package]] +name = "cachetools" +version = "5.3.2" +description = "Extensible memoizing collections and decorators" +optional = false +python-versions = ">=3.7" +files = [ + {file = "cachetools-5.3.2-py3-none-any.whl", hash = "sha256:861f35a13a451f94e301ce2bec7cac63e881232ccce7ed67fab9b5df4d3beaa1"}, + {file = "cachetools-5.3.2.tar.gz", hash = "sha256:086ee420196f7b2ab9ca2db2520aca326318b68fe5ba8bc4d49cca91add450f2"}, +] + [[package]] name = "certifi" version = "2024.2.2" @@ -2413,4 +2424,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "8d9864610f54050aec62bf75415e5b683a851323d054a38ff36e54d9d5c284e3" +content-hash = "4eb296ea314405bf39920f67d20eebb13cc8974254fd1643538bcb3a338976d2" diff --git a/pyproject.toml b/pyproject.toml index 93172c060..7d1b2fb32 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,6 +46,7 @@ pydantic = "^2.4.2" httpx = "^0.26.0" cloudcheck = "^2.1.0.181" tldextract = "^5.1.1" +cachetools = "^5.3.2" [tool.poetry.group.dev.dependencies] flake8 = "^6.0.0"