diff --git a/README.md b/README.md index 6b0cca3be..17ed4226a 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ ### A Recursive Internet Scanner for Hackers. -[![Python Version](https://img.shields.io/badge/python-3.9+-FF8400)](https://www.python.org) [![Black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) [![License](https://img.shields.io/badge/license-GPLv3-FF8400.svg)](https://github.com/blacklanternsecurity/bbot/blob/dev/LICENSE) [![DEF CON Demo Labs 2023](https://img.shields.io/badge/DEF%20CON%20Demo%20Labs-2023-FF8400.svg)](https://forum.defcon.org/node/246338) [![Tests](https://github.com/blacklanternsecurity/bbot/actions/workflows/tests.yml/badge.svg?branch=stable)](https://github.com/blacklanternsecurity/bbot/actions?query=workflow%3A"tests") [![Codecov](https://codecov.io/gh/blacklanternsecurity/bbot/branch/dev/graph/badge.svg?token=IR5AZBDM5K)](https://codecov.io/gh/blacklanternsecurity/bbot) [![Pypi Downloads](https://img.shields.io/pypi/dm/bbot)](https://pypistats.org/packages/bbot) [![Discord](https://img.shields.io/discord/859164869970362439)](https://discord.com/invite/PZqkgxu5SA) +[![Python Version](https://img.shields.io/badge/python-3.9+-FF8400)](https://www.python.org) [![License](https://img.shields.io/badge/license-GPLv3-FF8400.svg)](https://github.com/blacklanternsecurity/bbot/blob/dev/LICENSE) [![DEF CON Demo Labs 2023](https://img.shields.io/badge/DEF%20CON%20Demo%20Labs-2023-FF8400.svg)](https://forum.defcon.org/node/246338) [![PyPi Downloads](https://static.pepy.tech/personalized-badge/bbot?right_color=orange&left_color=grey)](https://pepy.tech/project/bbot) [![Black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) [![Tests](https://github.com/blacklanternsecurity/bbot/actions/workflows/tests.yml/badge.svg?branch=stable)](https://github.com/blacklanternsecurity/bbot/actions?query=workflow%3A"tests") [![Codecov](https://codecov.io/gh/blacklanternsecurity/bbot/branch/dev/graph/badge.svg?token=IR5AZBDM5K)](https://codecov.io/gh/blacklanternsecurity/bbot) [![Discord](https://img.shields.io/discord/859164869970362439)](https://discord.com/invite/PZqkgxu5SA) BBOT (Bighuge BLS OSINT Tool) is a recursive internet scanner inspired by [Spiderfoot](https://github.com/smicallef/spiderfoot), but designed to be faster, more reliable, and friendlier to pentesters, bug bounty hunters, and developers. @@ -62,7 +62,7 @@ git clone https://github.com/blacklanternsecurity/bbot && cd bbot
-Usage +Example Usage ## Example Commands @@ -114,7 +114,13 @@ bbot -t evilcorp.com -f subdomain-enum email-enum cloud-enum web-basic -m nmap g ## Targets -BBOT accepts an unlimited number of targets via `-t`. You can specify targets either directly on the command line or in files (or both!). Targets can be any of the following: +BBOT accepts an unlimited number of targets via `-t`. You can specify targets either directly on the command line or in files (or both!): + +```bash +bbot -t evilcorp.com evilcorp.org 1.2.3.0/24 -f subdomain-enum +``` + +Targets can be any of the following: - `DNS_NAME` (`evilcorp.com`) - `IP_ADDRESS` (`1.2.3.4`) @@ -280,4 +286,26 @@ For a full list of modules, including the data types consumed and emitted by eac | subdomain-hijack | 1 | Detects hijackable subdomains | subdomain_hijack | | web-screenshots | 1 | Takes screenshots of web pages | gowitness | -
+ +## BBOT Output Modules +BBOT can save its data to TXT, CSV, JSON, and tons of other destinations including [Neo4j](https://www.blacklanternsecurity.com/bbot/scanning/output/#neo4j), [Splunk](https://www.blacklanternsecurity.com/bbot/scanning/output/#splunk), and [Discord](https://www.blacklanternsecurity.com/bbot/scanning/output/#discord-slack-teams). For instructions on how to use these, see [Output Modules](https://www.blacklanternsecurity.com/bbot/scanning/output). + + +| Module | Type | Needs API Key | Description | Flags | Consumed Events | Produced Events | +|-----------------|--------|-----------------|-----------------------------------------------------------------------------------------|----------------|--------------------------------------------------------------------------------------------------|---------------------------| +| asset_inventory | output | No | Merge hosts, open ports, technologies, findings, etc. into a single asset inventory CSV | | DNS_NAME, FINDING, HTTP_RESPONSE, IP_ADDRESS, OPEN_TCP_PORT, TECHNOLOGY, URL, VULNERABILITY, WAF | IP_ADDRESS, OPEN_TCP_PORT | +| csv | output | No | Output to CSV | | * | | +| discord | output | No | Message a Discord channel when certain events are encountered | | * | | +| emails | output | No | Output any email addresses found belonging to the target domain | email-enum | EMAIL_ADDRESS | | +| http | output | No | Send every event to a custom URL via a web request | | * | | +| human | output | No | Output to text | | * | | +| json | output | No | Output to Newline-Delimited JSON (NDJSON) | | * | | +| neo4j | output | No | Output to Neo4j | | * | | +| python | output | No | Output via Python API | | * | | +| slack | output | No | Message a Slack channel when certain events are encountered | | * | | +| splunk | output | No | Send every event to a splunk instance through HTTP Event Collector | | * | | +| subdomains | output | No | Output only resolved, in-scope subdomains | subdomain-enum | DNS_NAME, DNS_NAME_UNRESOLVED | | +| teams | output | No | Message a Teams channel when certain events are encountered | | * | | +| web_report | output | No | Create a markdown report with web assets | | FINDING, TECHNOLOGY, URL, VHOST, VULNERABILITY | | +| websocket | output | No | Output to websockets | | * | | + diff --git a/bbot/cli.py b/bbot/cli.py index 9a8ab51dd..275d156ec 100755 --- a/bbot/cli.py +++ b/bbot/cli.py @@ -6,9 +6,9 @@ import asyncio import logging import traceback -from aioconsole import ainput from omegaconf import OmegaConf from contextlib import suppress +from aioconsole import stream # fix tee buffering sys.stdout.reconfigure(line_buffering=True) @@ -20,6 +20,7 @@ from bbot import __version__ from bbot.modules import module_loader from bbot.core.configurator.args import parser +from bbot.core.helpers.misc import smart_decode from bbot.core.helpers.logger import log_to_stderr from bbot.core.configurator import ensure_config_files, check_cli_args, environ @@ -301,46 +302,56 @@ async def _main(): if not options.dry_run: log.trace(f"Command: {' '.join(sys.argv)}") - if not options.agent_mode and not options.yes and sys.stdin.isatty(): - log.hugesuccess(f"Scan ready. Press enter to execute {scanner.name}") - input() - - def handle_keyboard_input(keyboard_input): - kill_regex = re.compile(r"kill (?P[a-z0-9_]+)") - if keyboard_input: - log.verbose(f'Got keyboard input: "{keyboard_input}"') - kill_match = kill_regex.match(keyboard_input) - if kill_match: - module = kill_match.group("module") - if module in scanner.modules: - log.hugewarning(f'Killing module: "{module}"') - scanner.manager.kill_module(module, message="killed by user") - else: - log.warning(f'Invalid module: "{module}"') - else: - toggle_log_level(logger=log) - scanner.manager.modules_status(_log=True) - async def akeyboard_listen(): - allowed_errors = 10 - while 1: - keyboard_input = "a" + if sys.stdin.isatty(): + if not options.agent_mode and not options.yes: + log.hugesuccess(f"Scan ready. Press enter to execute {scanner.name}") + input() + + def handle_keyboard_input(keyboard_input): + kill_regex = re.compile(r"kill (?P[a-z0-9_]+)") + if keyboard_input: + log.verbose(f'Got keyboard input: "{keyboard_input}"') + kill_match = kill_regex.match(keyboard_input) + if kill_match: + module = kill_match.group("module") + if module in scanner.modules: + log.hugewarning(f'Killing module: "{module}"') + scanner.manager.kill_module(module, message="killed by user") + else: + log.warning(f'Invalid module: "{module}"') + else: + toggle_log_level(logger=log) + scanner.manager.modules_status(_log=True) + + # Reader + reader = stream.StandardStreamReader() + protocol = stream.StandardStreamReaderProtocol(reader) + await asyncio.get_event_loop().connect_read_pipe(lambda: protocol, sys.stdin) + + async def akeyboard_listen(): try: - keyboard_input = await ainput() - except Exception: - allowed_errors -= 1 - handle_keyboard_input(keyboard_input) - if allowed_errors <= 0: - break - - try: - keyboard_listen_task = asyncio.create_task(akeyboard_listen()) - - await scanner.async_start_without_generator() - finally: - keyboard_listen_task.cancel() - with suppress(asyncio.CancelledError): - await keyboard_listen_task + allowed_errors = 10 + while 1: + keyboard_input = None + try: + keyboard_input = smart_decode((await reader.readline()).strip()) + allowed_errors = 10 + except Exception as e: + log_to_stderr(f"Error in keyboard listen loop: {e}", level="TRACE") + log_to_stderr(traceback.format_exc(), level="TRACE") + allowed_errors -= 1 + if keyboard_input is not None: + handle_keyboard_input(keyboard_input) + if allowed_errors <= 0: + break + except Exception as e: + log_to_stderr(f"Error in keyboard listen task: {e}", level="ERROR") + log_to_stderr(traceback.format_exc(), level="TRACE") + + asyncio.create_task(akeyboard_listen()) + + await scanner.async_start_without_generator() except bbot.core.errors.ScanError as e: log_to_stderr(str(e), level="ERROR") diff --git a/bbot/core/event/base.py b/bbot/core/event/base.py index 24606f890..0e63b6291 100644 --- a/bbot/core/event/base.py +++ b/bbot/core/event/base.py @@ -565,7 +565,7 @@ def __contains__(self, other): return host_in_host(other.host, self.host) return False - def json(self, mode="json"): + def json(self, mode="json", siem_friendly=False): """ Serializes the event object to a JSON-compatible dictionary. @@ -574,6 +574,7 @@ def json(self, mode="json"): Parameters: mode (str): Specifies the data serialization mode. Default is "json". Other options include "graph", "human", and "id". + siem_friendly (bool): Whether to format the JSON in a way that's friendly to SIEM ingestion by Elastic, Splunk, etc. This ensures the value of "data" is always the same type (a dictionary). Returns: dict: JSON-serializable dictionary representation of the event object. @@ -585,9 +586,13 @@ def json(self, mode="json"): j.update({i: v}) data_attr = getattr(self, f"data_{mode}", None) if data_attr is not None: - j["data"] = data_attr + data = data_attr else: - j["data"] = smart_decode(self.data) + data = smart_decode(self.data) + if siem_friendly: + j["data"] = {self.type: data} + else: + j["data"] = data web_spider_distance = getattr(self, "web_spider_distance", None) if web_spider_distance is not None: j["web_spider_distance"] = web_spider_distance @@ -1312,7 +1317,7 @@ def make_event( ) -def event_from_json(j): +def event_from_json(j, siem_friendly=False): """ Creates an event object from a JSON dictionary. @@ -1335,14 +1340,19 @@ def event_from_json(j): if required keys are missing. Make sure to validate the JSON input beforehand. """ try: + event_type = j["type"] kwargs = { - "data": j["data"], - "event_type": j["type"], + "event_type": event_type, "scans": j.get("scans", []), "tags": j.get("tags", []), "confidence": j.get("confidence", 5), "dummy": True, } + if siem_friendly: + data = j["data"][event_type] + else: + data = j["data"] + kwargs["data"] = data event = make_event(**kwargs) resolved_hosts = j.get("resolved_hosts", []) diff --git a/bbot/core/helpers/async_helpers.py b/bbot/core/helpers/async_helpers.py index 4cc701161..8434ccb0f 100644 --- a/bbot/core/helpers/async_helpers.py +++ b/bbot/core/helpers/async_helpers.py @@ -5,13 +5,12 @@ import threading from datetime import datetime from queue import Queue, Empty +from cachetools import LRUCache from .misc import human_timedelta from contextlib import asynccontextmanager log = logging.getLogger("bbot.core.helpers.async_helpers") -from .cache import CacheDict - class ShuffleQueue(asyncio.Queue): def _put(self, item): @@ -32,20 +31,20 @@ class NamedLock: """ Returns a unique asyncio.Lock() based on a provided string - Useful for preventing multiple operations from occuring on the same data in parallel + Useful for preventing multiple operations from occurring on the same data in parallel E.g. simultaneous DNS lookups on the same hostname """ def __init__(self, max_size=1000): - self._cache = CacheDict(max_size=max_size) + self._cache = LRUCache(maxsize=max_size) @asynccontextmanager async def lock(self, name): try: - lock = self._cache.get(name) + lock = self._cache[name] except KeyError: lock = _Lock(name) - self._cache.put(name, lock) + self._cache[name] = lock async with lock: yield diff --git a/bbot/core/helpers/cache.py b/bbot/core/helpers/cache.py index 3eb54daf7..3a70fbd24 100644 --- a/bbot/core/helpers/cache.py +++ b/bbot/core/helpers/cache.py @@ -1,8 +1,6 @@ import os import time import logging -from contextlib import suppress -from collections import OrderedDict from .misc import sha1 @@ -53,84 +51,3 @@ def is_cached(self, key, cache_hrs=24 * 7): def cache_filename(self, key): return self.cache_dir / sha1(key).hexdigest() - - -_sentinel = object() - - -class CacheDict: - """ - Dictionary to store cached values, with a maximum size limit - """ - - def __init__(self, max_size=1000): - self._cache = OrderedDict() - self._max_size = int(max_size) - - def get(self, name, fallback=_sentinel): - name_hash = self._hash(name) - try: - return self._cache[name_hash] - except KeyError: - if fallback is not _sentinel: - return fallback - raise - finally: - with suppress(KeyError): - self._cache.move_to_end(name_hash) - self._truncate() - - def put(self, name, value): - name_hash = self._hash(name) - try: - self._cache[name_hash] = value - finally: - with suppress(KeyError): - self._cache.move_to_end(name_hash) - self._truncate() - - def _truncate(self): - if not self or len(self) <= self._max_size: - return - for nh in list(self._cache.keys()): - try: - del self._cache[nh] - except KeyError: - pass - if not self or len(self) <= self._max_size: - break - - def keys(self): - return self._cache.keys() - - def values(self): - return self._cache.values() - - def items(self): - return self._cache.items() - - def clear(self): - return self._cache.clear() - - def _hash(self, v): - if type(v) == int: - return v - return hash(str(v)) - - def __contains__(self, item): - return self._hash(item) in self._cache - - def __iter__(self): - return iter(self._cache) - - def __getitem__(self, item): - return self.get(item) - - def __setitem__(self, item, value): - self.put(item, value) - - def __bool__(self): - return bool(self._cache) - - def __len__(self): - return len(self._cache) diff --git a/bbot/core/helpers/depsinstaller/installer.py b/bbot/core/helpers/depsinstaller/installer.py index 00662b969..049baef86 100644 --- a/bbot/core/helpers/depsinstaller/installer.py +++ b/bbot/core/helpers/depsinstaller/installer.py @@ -157,9 +157,9 @@ async def pip_install(self, packages, constraints=None): command = [sys.executable, "-m", "pip", "install", "--upgrade"] + packages if constraints: - contraints_tempfile = self.parent_helper.tempfile(constraints, pipe=False) + constraints_tempfile = self.parent_helper.tempfile(constraints, pipe=False) command.append("--constraint") - command.append(contraints_tempfile) + command.append(constraints_tempfile) process = None try: diff --git a/bbot/core/helpers/dns.py b/bbot/core/helpers/dns.py index 1ff4d5609..6cbaf9f8e 100644 --- a/bbot/core/helpers/dns.py +++ b/bbot/core/helpers/dns.py @@ -7,13 +7,14 @@ import contextlib import dns.exception import dns.asyncresolver +from cachetools import LRUCache from contextlib import suppress from .regexes import dns_name_regex from bbot.core.helpers.ratelimiter import RateLimiter from bbot.core.helpers.async_helpers import NamedLock from bbot.core.errors import ValidationError, DNSError, DNSWildcardBreak -from .misc import is_ip, is_domain, is_dns_name, domain_parents, parent_domain, rand_string, cloudcheck, as_completed +from .misc import is_ip, is_domain, is_dns_name, domain_parents, parent_domain, rand_string, cloudcheck log = logging.getLogger("bbot.core.helpers.dns") @@ -64,8 +65,8 @@ class DNSHelper: wildcard_ignore (tuple): Domains to be ignored during wildcard detection. wildcard_tests (int): Number of tests to be run for wildcard detection. Defaults to 5. _wildcard_cache (dict): Cache for wildcard detection results. - _dns_cache (CacheDict): Cache for DNS resolution results, limited in size. - _event_cache (CacheDict): Cache for event resolution results, tags. Limited in size. + _dns_cache (LRUCache): Cache for DNS resolution results, limited in size. + _event_cache (LRUCache): Cache for event resolution results, tags. Limited in size. resolver_file (Path): File containing system's current resolver nameservers. filter_bad_ptrs (bool): Whether to filter out DNS names that appear to be auto-generated PTR records. Defaults to True. @@ -130,8 +131,8 @@ def __init__(self, parent_helper): self.fallback_nameservers_file = self.parent_helper.wordlist_dir / "nameservers.txt" self._debug = self.parent_helper.config.get("dns_debug", False) self._dummy_modules = dict() - self._dns_cache = self.parent_helper.CacheDict(max_size=100000) - self._event_cache = self.parent_helper.CacheDict(max_size=10000) + self._dns_cache = LRUCache(maxsize=10000) + self._event_cache = LRUCache(maxsize=10000) self._event_cache_locks = NamedLock() # for mocking DNS queries @@ -530,9 +531,8 @@ async def resolve_event(self, event, minimal=False): types = ("A", "AAAA") if types: - tasks = [self.resolve_raw(event_host, type=t, use_cache=True) for t in types] - async for task in as_completed(tasks): - resolved_raw, errors = await task + for t in types: + resolved_raw, errors = await self.resolve_raw(event_host, type=t, use_cache=True) for rdtype, e in errors: if rdtype not in resolved_raw: event_tags.add(f"{rdtype.lower()}-error") @@ -631,24 +631,13 @@ def event_cache_get(self, host): except KeyError: return set(), None, None, set() - async def _resolve_batch_coro_wrapper(self, q, **kwargs): - """ - Helps us correlate task results back to their original arguments - """ - result = await self.resolve(q, **kwargs) - return (q, result) - async def resolve_batch(self, queries, **kwargs): """ - Asynchronously resolves a batch of queries in parallel and yields the results as they are completed. - - This method wraps around `_resolve_batch_coro_wrapper` to resolve a list of queries in parallel. - It batches the queries to a manageable size and executes them asynchronously, respecting - global rate limits. + A helper to execute a bunch of DNS requests. Args: queries (list): List of queries to resolve. - **kwargs: Additional keyword arguments to pass to `_resolve_batch_coro_wrapper`. + **kwargs: Additional keyword arguments to pass to `resolve()`. Yields: tuple: A tuple containing the original query and its resolved value. @@ -662,13 +651,8 @@ async def resolve_batch(self, queries, **kwargs): ('evilcorp.com', {'2.2.2.2'}) """ - queries = list(queries) - batch_size = 250 - for i in range(0, len(queries), batch_size): - batch = queries[i : i + batch_size] - tasks = [asyncio.create_task(self._resolve_batch_coro_wrapper(q, **kwargs)) for q in batch] - async for task in as_completed(tasks): - yield await task + for q in queries: + yield (q, await self.resolve(q, **kwargs)) def extract_targets(self, record): """ @@ -841,14 +825,11 @@ async def is_wildcard(self, query, ips=None, rdtype=None): # if the caller hasn't already done the work of resolving the IPs if ips is None: # then resolve the query for all rdtypes - base_query_tasks = { - t: asyncio.create_task(self.resolve_raw(query, type=t, use_cache=True)) for t in rdtypes_to_check - } - for _rdtype, task in base_query_tasks.items(): - raw_results, errors = await task + for t in rdtypes_to_check: + raw_results, errors = await self.resolve_raw(query, type=t, use_cache=True) if errors and not raw_results: - self.debug(f"Failed to resolve {query} ({_rdtype}) during wildcard detection") - result[_rdtype] = (None, parent) + self.debug(f"Failed to resolve {query} ({t}) during wildcard detection") + result[t] = (None, parent) continue for __rdtype, answers in raw_results: base_query_results = set() @@ -872,12 +853,13 @@ async def is_wildcard(self, query, ips=None, rdtype=None): # for every parent domain, starting with the shortest try: for host in parents[::-1]: + # make sure we've checked that domain for wildcards + await self.is_wildcard_domain(host) + # for every rdtype for _rdtype in list(base_query_ips): # get the IPs from above query_ips = base_query_ips.get(_rdtype, set()) - # make sure we've checked that domain for wildcards - await self.is_wildcard_domain(host) host_hash = hash(host) if host_hash in self._wildcard_cache: @@ -953,24 +935,20 @@ async def is_wildcard_domain(self, domain, log_info=False): wildcard_domain_results[host] = self._wildcard_cache[host_hash] continue + log.verbose(f"Checking if {host} is a wildcard") + # determine if this is a wildcard domain - wildcard_tasks = {t: [] for t in rdtypes_to_check} + # resolve a bunch of random subdomains of the same parent - for rdtype in rdtypes_to_check: + is_wildcard = False + wildcard_results = dict() + for rdtype in list(rdtypes_to_check): # continue if a wildcard was already found for this rdtype # if rdtype in self._wildcard_cache[host_hash]: # continue for _ in range(self.wildcard_tests): rand_query = f"{rand_string(digits=False, length=10)}.{host}" - wildcard_task = asyncio.create_task(self.resolve(rand_query, type=rdtype, use_cache=False)) - wildcard_tasks[rdtype].append(wildcard_task) - - # combine the random results - is_wildcard = False - wildcard_results = dict() - for rdtype, tasks in wildcard_tasks.items(): - async for task in as_completed(tasks): - results = await task + results = await self.resolve(rand_query, type=rdtype, use_cache=False) if results: is_wildcard = True if not rdtype in wildcard_results: @@ -989,6 +967,8 @@ async def is_wildcard_domain(self, domain, log_info=False): if log_info: log_fn = log.info log_fn(f"Encountered domain with wildcard DNS ({wildcard_rdtypes_str}): {host}") + else: + log.verbose(f"Finished checking {host}, it is not a wildcard") return wildcard_domain_results @@ -1035,7 +1015,7 @@ def _parse_rdtype(self, t, default=None): def debug(self, *args, **kwargs): if self._debug: - log.debug(*args, **kwargs) + log.trace(*args, **kwargs) def _get_dummy_module(self, name): try: diff --git a/bbot/core/helpers/helper.py b/bbot/core/helpers/helper.py index 36c6346c9..899f3ab0b 100644 --- a/bbot/core/helpers/helper.py +++ b/bbot/core/helpers/helper.py @@ -48,8 +48,8 @@ class ConfigAwareHelper: from . import regexes from . import validators from .files import tempfile, feed_pipe, _feed_pipe, tempfile_tail + from .cache import cache_get, cache_put, cache_filename, is_cached from .command import run, run_live, _spawn_proc, _prepare_command_kwargs - from .cache import cache_get, cache_put, cache_filename, is_cached, CacheDict def __init__(self, config, scan=None): self.config = config diff --git a/bbot/core/helpers/interactsh.py b/bbot/core/helpers/interactsh.py index 871ecb1c4..aad4a169f 100644 --- a/bbot/core/helpers/interactsh.py +++ b/bbot/core/helpers/interactsh.py @@ -127,8 +127,9 @@ async def register(self, callback=None): if self.custom_server: if not self.token: log.verbose("Interact.sh token is not set") - headers["Authorization"] = self.token - self.server_list = [self.custom_server] + else: + headers["Authorization"] = self.token + self.server_list = [str(self.custom_server)] else: self.server_list = random.sample(server_list, k=len(server_list)) for server in self.server_list: diff --git a/bbot/core/helpers/misc.py b/bbot/core/helpers/misc.py index 283bc3782..a67e89402 100644 --- a/bbot/core/helpers/misc.py +++ b/bbot/core/helpers/misc.py @@ -66,9 +66,14 @@ def is_domain(d): - Port, if present in input, is ignored. """ d, _ = split_host_port(d) + if is_ip(d): + return False extracted = tldextract(d) - if extracted.domain and not extracted.subdomain: - return True + if extracted.registered_domain: + if not extracted.subdomain: + return True + else: + return d.count(".") == 1 return False @@ -96,9 +101,14 @@ def is_subdomain(d): - Port, if present in input, is ignored. """ d, _ = split_host_port(d) + if is_ip(d): + return False extracted = tldextract(d) - if extracted.domain and extracted.subdomain: - return True + if extracted.registered_domain: + if extracted.subdomain: + return True + else: + return d.count(".") > 1 return False @@ -327,6 +337,23 @@ def domain_parents(d, include_self=False): break +def subdomain_depth(d): + """ + Calculate the depth of subdomains within a given domain name. + + Args: + d (str): The domain name to analyze. + + Returns: + int: The depth of the subdomain. For example, a hostname "5.4.3.2.1.evilcorp.com" + has a subdomain depth of 5. + """ + subdomain, domain = split_domain(d) + if not subdomain: + return 0 + return subdomain.count(".") + 1 + + def parent_url(u): """ Retrieve the parent URL of a given URL. @@ -601,9 +628,6 @@ def is_ip(d, version=None): >>> is_ip('evilcorp.com') False """ - if isinstance(d, (ipaddress.IPv4Address, ipaddress.IPv6Address)): - if version is None or version == d.version: - return True try: ip = ipaddress.ip_address(d) if version is None or ip.version == version: @@ -1427,7 +1451,7 @@ def search_dict_values(d, *regexes): ... ] ... } ... } - >>> url_regexes = re.compile(r'https?://[^\s<>"]+|www\.[^\s<>"]+') + >>> url_regexes = re.compile(r'https?://[^\\s<>"]+|www\.[^\\s<>"]+') >>> list(search_dict_values(dict_to_search, url_regexes)) ["https://www.evilcorp.com"] """ diff --git a/bbot/core/helpers/ntlm.py b/bbot/core/helpers/ntlm.py index e4d9cd1ca..8605ef34a 100644 --- a/bbot/core/helpers/ntlm.py +++ b/bbot/core/helpers/ntlm.py @@ -38,7 +38,7 @@ def __init__(self, pos_tup, raw): def decode_ntlm_challenge(st): hdr_tup = struct.unpack(">> soup = self.helpers.beautifulsoup(event.data["body"], "html.parser") + Perform an html parse of the 'markup' argument and return a soup instance + + >>> email_type = soup.find(type="email") + Searches the soup instance for all occurances of the passed in argument + """ + try: + soup = BeautifulSoup( + markup, features, builder, parse_only, from_encoding, exclude_encodings, element_classes, **kwargs + ) + return soup + except Exception as e: + log.debug(f"Error parsing beautifulsoup: {e}") + return False + def ssl_context_noverify(self): if self._ssl_context_noverify is None: ssl_context = ssl.create_default_context() @@ -616,6 +675,12 @@ async def _acatch(self, url, raise_error): log.trace(traceback.format_exc()) if raise_error: raise httpx.RequestError(msg) + except SOCKSError as e: + msg = f"SOCKS error with request to URL: {url}: {e}" + log.trace(msg) + log.trace(traceback.format_exc()) + if raise_error: + raise httpx.RequestError(msg) except BaseException as e: # don't log if the error is the result of an intentional cancellation if not any( diff --git a/bbot/modules/anubisdb.py b/bbot/modules/anubisdb.py index 7b0cda171..9864e3c6d 100644 --- a/bbot/modules/anubisdb.py +++ b/bbot/modules/anubisdb.py @@ -6,6 +6,10 @@ class anubisdb(subdomain_enum): watched_events = ["DNS_NAME"] produced_events = ["DNS_NAME"] meta = {"description": "Query jldc.me's database for subdomains"} + options = {"limit": 1000} + options_desc = { + "limit": "Limit the number of subdomains returned per query (increasing this may slow the scan due to garbage results from this API)" + } base_url = "https://jldc.me/anubis/subdomains" dns_abort_depth = 5 @@ -36,6 +40,9 @@ def parse_results(self, r, query): if json: for hostname in json: hostname = str(hostname).lower() - if hostname.endswith(f".{query}") and not self.abort_if_pre(hostname): + in_scope = hostname.endswith(f".{query}") + is_ptr = self.helpers.is_ptr(hostname) + too_long = self.abort_if_pre(hostname) + if in_scope and not is_ptr and not too_long: results.add(hostname) - return results + return sorted(results)[: self.config.get("limit", 1000)] diff --git a/bbot/modules/base.py b/bbot/modules/base.py index be0d118c6..d888bf699 100644 --- a/bbot/modules/base.py +++ b/bbot/modules/base.py @@ -104,7 +104,7 @@ class BaseModule: _preserve_graph = False _stats_exclude = False - _qsize = 100 + _qsize = 1000 _priority = 3 _name = "base" _type = "scan" @@ -297,7 +297,7 @@ async def ping(self): def batch_size(self): batch_size = self.config.get("batch_size", None) # only allow overriding the batch size if its default value is greater than 1 - # this prevents modules from being accidentally neutered by an incorect batch_size setting + # this prevents modules from being accidentally neutered by an incorrect batch_size setting if batch_size is None or self._batch_size == 1: batch_size = self._batch_size return batch_size @@ -358,12 +358,12 @@ async def _handle_batch(self): events, finish = await self._events_waiting() if events and not self.errored: counter.n = len(events) - self.debug(f"Handling batch of {len(events):,} events") + self.verbose(f"Handling batch of {len(events):,} events") submitted = True async with self.scan._acatch(f"{self.name}.handle_batch()"): handle_batch_task = asyncio.create_task(self.handle_batch(*events)) await handle_batch_task - self.debug(f"Finished handling batch of {len(events):,} events") + self.verbose(f"Finished handling batch of {len(events):,} events") if finish: context = f"{self.name}.finish()" async with self.scan._acatch(context), self._task_counter.count(context): diff --git a/bbot/modules/bevigil.py b/bbot/modules/bevigil.py index ff868e969..435ceae08 100644 --- a/bbot/modules/bevigil.py +++ b/bbot/modules/bevigil.py @@ -34,7 +34,7 @@ async def handle_event(self, event): if self.urls: urls = await self.query(query, request_fn=self.request_urls, parse_fn=self.parse_urls) if urls: - for parsed_url in await self.scan.run_in_executor(self.helpers.validators.collapse_urls, urls): + for parsed_url in await self.scan.run_in_executor_mp(self.helpers.validators.collapse_urls, urls): await self.emit_event(parsed_url.geturl(), "URL_UNVERIFIED", source=event) async def request_subdomains(self, query): diff --git a/bbot/modules/deadly/ffuf.py b/bbot/modules/deadly/ffuf.py index f7ba3d96e..2c27f055b 100644 --- a/bbot/modules/deadly/ffuf.py +++ b/bbot/modules/deadly/ffuf.py @@ -143,7 +143,7 @@ async def baseline_ffuf(self, url, exts=[""], prefix="", suffix="", mode="normal # if we only got 403, we might already be blocked by a WAF. Issue a warning, but it's possible all 'not founds' are given 403 if canary_results[0]["status"] == 403: self.warning( - "All requests of the baseline recieved a 403 response. It is possible a WAF is actively blocking your traffic." + "All requests of the baseline received a 403 response. It is possible a WAF is actively blocking your traffic." ) # if we only got 429, we are almost certainly getting blocked by a WAF or rate-limiting. Specifically with 429, we should respect them and abort the scan. diff --git a/bbot/modules/deadly/nuclei.py b/bbot/modules/deadly/nuclei.py index be24599ac..04be1aa95 100644 --- a/bbot/modules/deadly/nuclei.py +++ b/bbot/modules/deadly/nuclei.py @@ -85,7 +85,7 @@ async def setup(self): self.info(f"Excluding the following nuclei tags: [{self.etags}]") self.severity = self.config.get("severity") if self.mode != "severe" and self.severity != "": - self.info(f"Limiting nuclei templates to the following severites: [{self.severity}]") + self.info(f"Limiting nuclei templates to the following severities: [{self.severity}]") self.iserver = self.scan.config.get("interactsh_server", None) self.itoken = self.scan.config.get("interactsh_token", None) self.retries = int(self.config.get("retries", 0)) @@ -120,7 +120,7 @@ async def setup(self): self.info("Processing nuclei templates to perform budget calculations...") self.nucleibudget = NucleiBudget(self) - self.budget_templates_file = self.helpers.tempfile(self.nucleibudget.collapsable_templates, pipe=False) + self.budget_templates_file = self.helpers.tempfile(self.nucleibudget.collapsible_templates, pipe=False) self.info( f"Loaded [{str(sum(self.nucleibudget.severity_stats.values()))}] templates based on a budget of [{str(self.budget)}] request(s)" @@ -295,7 +295,7 @@ def __init__(self, nuclei_module): self.templates_dir = nuclei_module.nuclei_templates_dir self.yaml_list = self.get_yaml_list() self.budget_paths = self.find_budget_paths(nuclei_module.budget) - self.collapsable_templates, self.severity_stats = self.find_collapsable_templates() + self.collapsible_templates, self.severity_stats = self.find_collapsible_templates() def get_yaml_list(self): return list(self.templates_dir.rglob("*.yaml")) @@ -331,8 +331,8 @@ def get_yaml_info_attr(self, yf, attr): yield res # Parse through all templates and locate those which match the conditions necessary to collapse down to the budget setting - def find_collapsable_templates(self): - collapsable_templates = [] + def find_collapsible_templates(self): + collapsible_templates = [] severity_dict = {} for yf in self.yaml_list: valid = True @@ -365,14 +365,14 @@ def find_collapsable_templates(self): valid = False if valid: - collapsable_templates.append(str(yf)) + collapsible_templates.append(str(yf)) severity_gen = self.get_yaml_info_attr(yf, "severity") severity = next(severity_gen) if severity in severity_dict.keys(): severity_dict[severity] += 1 else: severity_dict[severity] = 1 - return collapsable_templates, severity_dict + return collapsible_templates, severity_dict def parse_yaml(self, yamlfile): if yamlfile not in self._yaml_files: diff --git a/bbot/modules/dnscommonsrv.py b/bbot/modules/dnscommonsrv.py index 958b6b612..eef8e2d8c 100644 --- a/bbot/modules/dnscommonsrv.py +++ b/bbot/modules/dnscommonsrv.py @@ -1,91 +1,151 @@ from bbot.modules.base import BaseModule +# the following are the result of a 1-day internet survey to find the top SRV records +# the scan resulted in 36,282 SRV records. the count for each one is shown. common_srvs = [ - # Micro$oft - "_ldap._tcp.dc._msdcs", - "_ldap._tcp.gc._msdcs", - "_ldap._tcp.pdc._msdcs", - "_ldap._tcp", - "_ldap._tcp.ForestDNSZones", - "_gc._msdcs", - "_kpasswd._tcp", - "_kpasswd._udp", - "_kerberos._tcp.dc._msdcs", - "_kerberos.tcp.dc._msdcs", - "_kerberos-master._tcp", - "_kerberos-master._udp", - "_kerberos._tcp", - "_kerberos._udp", - "_autodiscover._tcp", - # NTP - "_ntp._udp", - # mDNS - "_nntp._tcp", - # email - "_imap._tcp", - "_imap.tcp", - "_imaps._tcp", - "_pop3._tcp", - "_pop3s._tcp", - "_smtp._tcp", - # MailEnable - "_caldav._tcp", - "_caldavs._tcp", - "_carddav._tcp", - "_carddavs._tcp", - # STUN - "_stun._tcp", - "_stun._udp", - "_stuns._tcp", - "_turn._tcp", - "_turn._udp", - "_turns._tcp", - # SIP - "_h323be._tcp", - "_h323be._udp", - "_h323cs._tcp", - "_h323cs._udp", - "_h323ls._tcp", - "_h323ls._udp", - "_sip._tcp", - "_sip._tls", - "_sip._udp", - "_sipfederationtls._tcp", - "_sipinternal._tcp", - "_sipinternaltls._tcp", - "_sips._tcp", - # misc - "_aix._tcp", - "_certificates._tcp", - "_cmp._tcp", - "_crl._tcp", - "_crls._tcp", - "_finger._tcp", - "_ftp._tcp", - "_gc._tcp", - "_hkp._tcp", - "_hkps._tcp", - "_http._tcp", - "_https._tcp", - "_jabber-client._tcp", - "_jabber-client._udp", - "_jabber._tcp", - "_jabber._udp", - "_ocsp._tcp", - "_pgpkeys._tcp", - "_pgprevokations._tcp", - "_PKIXREP._tcp", - "_submission._tcp", - "_svcp._tcp", - "_telnet._tcp", - "_test._tcp", - "_whois._tcp", - "_x-puppet-ca._tcp", - "_x-puppet._tcp", - "_xmpp-client._tcp", - "_xmpp-client._udp", - "_xmpp-server._tcp", - "_xmpp-server._udp", + "_sipfederationtls._tcp", # 6909 + "_sip._tls", # 6853 + "_autodiscover._tcp", # 4268 + "_xmpp-server._tcp", # 1437 + "_sip._tcp", # 1193 + "_sips._tcp", # 1183 + "_caldavs._tcp", # 1179 + "_carddavs._tcp", # 1132 + "_caldav._tcp", # 1035 + "_carddav._tcp", # 1024 + "_sip._udp", # 1007 + "_imaps._tcp", # 1007 + "_submission._tcp", # 906 + "_h323cs._tcp", # 846 + "_h323ls._udp", # 782 + "_xmpp-client._tcp", # 689 + "_pop3s._tcp", # 394 + "_jabber._tcp", # 277 + "_imap._tcp", # 267 + "_turn._udp", # 256 + "_pop3._tcp", # 221 + "_ldap._tcp", # 213 + "_smtps._tcp", # 195 + "_sipinternaltls._tcp", # 192 + "_vlmcs._tcp", # 165 + "_kerberos._udp", # 163 + "_kerberos._tcp", # 148 + "_kpasswd._udp", # 128 + "_kpasswd._tcp", # 100 + "_ntp._udp", # 90 + "_gc._tcp", # 73 + "_kerberos-master._udp", # 66 + "_ldap._tcp.dc._msdcs", # 63 + "_matrix._tcp", # 62 + "_smtp._tcp", # 61 + "_stun._udp", # 57 + "_kerberos._tcp.dc._msdcs", # 54 + "_ldap._tcp.gc._msdcs", # 49 + "_kerberos-adm._tcp", # 44 + "_ldap._tcp.pdc._msdcs", # 43 + "_kerberos-master._tcp", # 43 + "_http._tcp", # 37 + "_h323rs._tcp", # 36 + "_sipinternal._tcp", # 35 + "_turn._tcp", # 33 + "_stun._tcp", # 33 + "_h323ls._tcp", # 33 + "_x-puppet._tcp", # 30 + "_h323cs._udp", # 27 + "_stuns._tcp", # 26 + "_jabber-client._tcp", # 25 + "_x-puppet-ca._tcp", # 22 + "_ts3._udp", # 22 + "_minecraft._tcp", # 22 + "_turns._tcp", # 21 + "_ldaps._tcp", # 21 + "_xmpps-client._tcp", # 20 + "_https._tcp", # 19 + "_ftp._tcp", # 19 + "_xmpp-server._udp", # 18 + "_xmpp-client._udp", # 17 + "_jabber._udp", # 17 + "_jabber-client._udp", # 17 + "_xmpps-server._tcp", # 15 + "_finger._tcp", # 14 + "_stuns._udp", # 12 + "_hkp._tcp", # 12 + "_vlmcs._udp", # 11 + "_turns._udp", # 11 + "_tftp._udp", # 11 + "_ssh._tcp", # 11 + "_rtps._udp", # 11 + "_mysqlsrv._tcp", # 11 + "_hkps._tcp", # 11 + "_h323be._udp", # 11 + "_dns._tcp", # 11 + "_wss._tcp", # 10 + "_wpad._tcp", # 10 + "_whois._tcp", # 10 + "_webexconnect._tcp", # 10 + "_webexconnects._tcp", # 10 + "_vnc._tcp", # 10 + "_test._tcp", # 10 + "_telnet._tcp", # 10 + "_telnets._tcp", # 10 + "_teamspeak._tcp", # 10 + "_svns._tcp", # 10 + "_svcp._tcp", # 10 + "_smb._tcp", # 10 + "_sip-tls._tcp", # 10 + "_sftp._tcp", # 10 + "_secure-pop3._tcp", # 10 + "_secure-imap._tcp", # 10 + "_rtsp._tcp", # 10 + "_rtps._tcp", # 10 + "_rpc._tcp", # 10 + "_rfb._tcp", # 10 + "_raop._tcp", # 10 + "_pstn._tcp", # 10 + "_presence._tcp", # 10 + "_pkixrep._tcp", # 10 + "_pgprevokations._tcp", # 10 + "_pgpkeys._tcp", # 10 + "_ocsp._tcp", # 10 + "_nntp._tcp", # 10 + "_nfs._tcp", # 10 + "_netbios-ssn._tcp", # 10 + "_netbios-ns._tcp", # 10 + "_netbios-dgm._tcp", # 10 + "_mumble._tcp", # 10 + "_msrpc._tcp", # 10 + "_mqtts._tcp", # 10 + "_minecraft._udp", # 10 + "_iscsi._tcp", # 10 + "_ircs._tcp", # 10 + "_ipp._tcp", # 10 + "_ipps._tcp", # 10 + "_h323be._tcp", # 10 + "_gits._tcp", # 10 + "_ftps._tcp", # 10 + "_ftpes._tcp", # 10 + "_dnss._udp", # 10 + "_dnss._tcp", # 10 + "_diameter._tcp", # 10 + "_crl._tcp", # 10 + "_crls._tcp", # 10 + "_cmp._tcp", # 10 + "_certificates._tcp", # 10 + "_aix._tcp", # 10 + "_afpovertcp._tcp", # 10 + "_collab-edge._tls", # 6 + "_tcp", # 5 + "_wildcard", # 3 + "_client._smtp", # 3 + "_udp", # 2 + "_tls", # 2 + "_msdcs", # 2 + "_gc._msdcs", # 2 + "_ldaps._tcp.dc._msdcs", # 1 + "_kerberos._tcp.kdc._msdcs", # 1 + "_kerberos.tcp.dc._msdcs", # 1 + "_imap", # 1 + "_iax", # 1 ] @@ -94,7 +154,17 @@ class dnscommonsrv(BaseModule): produced_events = ["DNS_NAME"] flags = ["subdomain-enum", "passive", "safe"] meta = {"description": "Check for common SRV records"} - _max_event_handlers = 5 + options = {"top": 50, "max_event_handlers": 10} + options_desc = { + "top": "How many of the top SRV records to check", + "max_event_handlers": "How many instances of the module to run concurrently", + } + _max_event_handlers = 10 + + def _incoming_dedup_hash(self, event): + # dedupe by parent + parent_domain = self.helpers.parent_domain(event.data) + return hash(parent_domain), "already processed parent domain" async def filter_event(self, event): # skip SRV wildcards @@ -103,7 +173,9 @@ async def filter_event(self, event): return True async def handle_event(self, event): - queries = [event.data] + [f"{srv}.{event.data}" for srv in common_srvs] + top = int(self.config.get("top", 50)) + parent_domain = self.helpers.parent_domain(event.data) + queries = [f"{srv}.{parent_domain}" for srv in common_srvs[:top]] async for query, results in self.helpers.resolve_batch(queries, type="srv"): if results: await self.emit_event(query, "DNS_NAME", tags=["srv-record"], source=event) diff --git a/bbot/modules/dnsdumpster.py b/bbot/modules/dnsdumpster.py index 8bb1fa1ed..c119857be 100644 --- a/bbot/modules/dnsdumpster.py +++ b/bbot/modules/dnsdumpster.py @@ -1,5 +1,4 @@ import re -from bs4 import BeautifulSoup from bbot.modules.templates.subdomain_enum import subdomain_enum @@ -25,7 +24,7 @@ async def query(self, domain): return ret else: self.debug(f'Valid response code "{status_code}" from DNSDumpster') - html = BeautifulSoup(res1.content, "html.parser") + html = self.helpers.beautifulsoup(res1.content, "html.parser") csrftoken = None csrfmiddlewaretoken = None try: @@ -73,7 +72,7 @@ async def query(self, domain): self.verbose(f'Bad response code "{status_code}" from DNSDumpster') return ret - html = BeautifulSoup(res2.content, "html.parser") + html = self.helpers.beautifulsoup(res2.content, "html.parser") escaped_domain = re.escape(domain) match_pattern = re.compile(r"^[\w\.-]+\." + escaped_domain + r"$") for subdomain in html.findAll(text=match_pattern): diff --git a/bbot/modules/ffuf_shortnames.py b/bbot/modules/ffuf_shortnames.py index c062f09df..562ae681f 100644 --- a/bbot/modules/ffuf_shortnames.py +++ b/bbot/modules/ffuf_shortnames.py @@ -114,7 +114,7 @@ def build_extension_list(self, event): else: return [extension_hint] - def find_delimeter(self, hint): + def find_delimiter(self, hint): delimiters = ["_", "-"] for d in delimiters: if d in hint: @@ -169,13 +169,13 @@ async def handle_event(self, event): if self.config.get("find_delimiters"): if "shortname-directory" in event.tags: - delimeter_r = self.find_delimeter(filename_hint) - if delimeter_r: - delimeter, prefix, partial_hint = delimeter_r - self.verbose(f"Detected delimeter [{delimeter}] in hint [{filename_hint}]") + delimiter_r = self.find_delimiter(filename_hint) + if delimiter_r: + delimiter, prefix, partial_hint = delimiter_r + self.verbose(f"Detected delimiter [{delimiter}] in hint [{filename_hint}]") tempfile, tempfile_len = self.generate_templist(prefix=partial_hint) async for r in self.execute_ffuf( - tempfile, root_url, prefix=f"{prefix}{delimeter}", exts=["/"] + tempfile, root_url, prefix=f"{prefix}{delimiter}", exts=["/"] ): await self.emit_event( r["url"], "URL_UNVERIFIED", source=event, tags=[f"status-{r['status']}"] @@ -183,13 +183,13 @@ async def handle_event(self, event): elif "shortname-file" in event.tags: for ext in used_extensions: - delimeter_r = self.find_delimeter(filename_hint) - if delimeter_r: - delimeter, prefix, partial_hint = delimeter_r - self.verbose(f"Detected delimeter [{delimeter}] in hint [{filename_hint}]") + delimiter_r = self.find_delimiter(filename_hint) + if delimiter_r: + delimiter, prefix, partial_hint = delimiter_r + self.verbose(f"Detected delimiter [{delimiter}] in hint [{filename_hint}]") tempfile, tempfile_len = self.generate_templist(prefix=partial_hint) async for r in self.execute_ffuf( - tempfile, root_url, prefix=f"{prefix}{delimeter}", suffix=f".{ext}" + tempfile, root_url, prefix=f"{prefix}{delimiter}", suffix=f".{ext}" ): await self.emit_event( r["url"], "URL_UNVERIFIED", source=event, tags=[f"status-{r['status']}"] diff --git a/bbot/modules/github_codesearch.py b/bbot/modules/github_codesearch.py index a138b4399..fdc58695b 100644 --- a/bbot/modules/github_codesearch.py +++ b/bbot/modules/github_codesearch.py @@ -53,8 +53,8 @@ async def query(self, query): if not items: break for item in items: - htlm_url = item.get("html_url", "") - raw_url = self.raw_url(htlm_url) + html_url = item.get("html_url", "") + raw_url = self.raw_url(html_url) repo_url = item.get("repository", {}).get("html_url", "") if raw_url and repo_url: try: diff --git a/bbot/modules/internetdb.py b/bbot/modules/internetdb.py index b3e98b9fc..847db0c7a 100644 --- a/bbot/modules/internetdb.py +++ b/bbot/modules/internetdb.py @@ -40,8 +40,7 @@ class internetdb(BaseModule): flags = ["passive", "safe", "portscan", "subdomain-enum"] meta = {"description": "Query Shodan's InternetDB for open ports, hostnames, technologies, and vulnerabilities"} - # limit outgoing queue size to help avoid rate limiting - _qsize = 100 + _qsize = 500 base_url = "https://internetdb.shodan.io" @@ -116,10 +115,11 @@ def get_ip(self, event): elif event.type == "DNS_NAME": # always try IPv4 first ipv6 = [] - for host in event.resolved_hosts: - if self.helpers.is_ip(host, version=4): - return host - elif self.helpers.is_ip(host, version=6): - ipv6.append(host) + ips = [h for h in event.resolved_hosts if self.helpers.is_ip(h)] + for ip in sorted([str(ip) for ip in ips]): + if self.helpers.is_ip(ip, version=4): + return ip + elif self.helpers.is_ip(ip, version=6): + ipv6.append(ip) for ip in ipv6: return ip diff --git a/bbot/modules/masscan.py b/bbot/modules/masscan.py index 15881d5b2..895ffe243 100644 --- a/bbot/modules/masscan.py +++ b/bbot/modules/masscan.py @@ -241,9 +241,11 @@ async def emit_from_cache(self): await self.emit_event(line, "OPEN_TCP_PORT", source=source_event) def get_source_event(self, host): - source_event = self.scan.whitelist.get(host) + source_event = self.scan.target.get(host) if source_event is None: - source_event = self.scan.root_event + source_event = self.scan.whitelist.get(host) + if source_event is None: + source_event = self.scan.root_event return source_event async def cleanup(self): diff --git a/bbot/modules/massdns.py b/bbot/modules/massdns.py index 965909d31..02ff1aa85 100644 --- a/bbot/modules/massdns.py +++ b/bbot/modules/massdns.py @@ -1,6 +1,7 @@ import re import json import random +import asyncio import subprocess from bbot.modules.templates.subdomain_enum import subdomain_enum @@ -13,8 +14,8 @@ class massdns(subdomain_enum): It uses massdns to brute-force subdomains. At the end of a scan, it will leverage BBOT's word cloud to recursively discover target-specific subdomain mutations. - Each subdomain discovered via mutations is tagged with the "mutation" tag. This tag includes the depth at which - the mutations is found. I.e. the first mutation will be tagged "mutation-1". The second one (a mutation of a + Each subdomain discovered via mutations is tagged with the "mutation" tag. This tag indicates the depth at which + the mutation was found. I.e. the first mutation will be tagged "mutation-1". The second one (a mutation of a mutation) will be "mutation-2". Mutations of mutations of mutations will be "mutation-3", etc. This is especially use for bug bounties because it enables you to recognize distant/rare subdomains at a glance. @@ -29,11 +30,13 @@ class massdns(subdomain_enum): "wordlist": "https://raw.githubusercontent.com/danielmiessler/SecLists/master/Discovery/DNS/subdomains-top1million-5000.txt", "max_resolvers": 1000, "max_mutations": 500, + "max_depth": 5, } options_desc = { "wordlist": "Subdomain wordlist URL", "max_resolvers": "Number of concurrent massdns resolvers", "max_mutations": "Max number of smart mutations per subdomain", + "max_depth": "How many subdomains deep to brute force, i.e. 5.4.3.2.1.evilcorp.com", } subdomain_file = None deps_ansible = [ @@ -72,7 +75,7 @@ class massdns(subdomain_enum): }, ] reject_wildcards = "strict" - _qsize = 100 + _qsize = 10000 digit_regex = re.compile(r"\d+") @@ -89,6 +92,7 @@ async def setup(self): self.max_resolvers = self.config.get("max_resolvers", 1000) self.max_mutations = self.config.get("max_mutations", 500) + self.max_depth = max(1, self.config.get("max_depth", 5)) nameservers_url = ( "https://raw.githubusercontent.com/blacklanternsecurity/public-dns-servers/master/nameservers.txt" ) @@ -97,17 +101,33 @@ async def setup(self): cache_hrs=24 * 7, ) self.devops_mutations = list(self.helpers.word_cloud.devops_mutations) - self._mutation_run = 1 + self.mutation_run = 1 + + self.resolve_and_emit_queue = asyncio.Queue() + self.resolve_and_emit_task = asyncio.create_task(self.resolve_and_emit()) return await super().setup() async def filter_event(self, event): query = self.make_query(event) eligible, reason = await self.eligible_for_enumeration(event) + + # limit brute force depth + subdomain_depth = self.helpers.subdomain_depth(query) + 1 + if subdomain_depth > self.max_depth: + eligible = False + reason = f"subdomain depth of *.{query} ({subdomain_depth}) > max_depth ({self.max_depth})" + + # don't brute-force things that look like autogenerated PTRs + if self.helpers.is_ptr(query): + eligible = False + reason = f'"{query}" looks like an autogenerated PTR' + if eligible: self.add_found(event) # reject if already processed if self.already_processed(query): return False, f'Query "{query}" was already processed' + if eligible: self.processed.add(hash(query)) return True, reason @@ -116,23 +136,18 @@ async def filter_event(self, event): async def handle_event(self, event): query = self.make_query(event) self.source_events.add_target(event) - self.info(f"Brute-forcing subdomains for {query} (source: {event.data})") - for hostname in await self.massdns(query, self.subdomain_list): - await self.emit_result(hostname, event, query) + results = await self.massdns(query, self.subdomain_list) + await self.resolve_and_emit_queue.put((results, event, None)) def abort_if(self, event): if not event.scope_distance == 0: return True, "event is not in scope" if "wildcard" in event.tags: return True, "event is a wildcard" - - async def emit_result(self, result, source_event, query, tags=None): - if not result == source_event: - kwargs = {"abort_if": self.abort_if} - if tags is not None: - kwargs["tags"] = tags - await self.emit_event(result, "DNS_NAME", source_event, **kwargs) + if "unresolved" in event.tags: + return True, "event is unresolved" + return False, "" def already_processed(self, hostname): if hash(hostname) in self.processed: @@ -143,7 +158,7 @@ async def massdns(self, domain, subdomains): subdomains = list(subdomains) domain_wildcard_rdtypes = set() - for domain, rdtypes in (await self.helpers.is_wildcard_domain(domain)).items(): + for _domain, rdtypes in (await self.helpers.is_wildcard_domain(domain)).items(): for rdtype, results in rdtypes.items(): if results: domain_wildcard_rdtypes.add(rdtype) @@ -204,12 +219,36 @@ async def massdns(self, domain, subdomains): ) # everything checks out - self.verbose(f"Resolving batch of {len(results):,} results") - resolved = dict([l async for l in self.helpers.resolve_batch(results, type=("A", "CNAME"))]) - resolved = {k: v for k, v in resolved.items() if v} - for hostname in resolved: - self.add_found(hostname) - return list(resolved) + return results + + async def resolve_and_emit(self): + """ + When results are found, they are placed into self.resolve_and_emit_queue. + The purpose of this function (which is started as a task in the module's setup()) is to consume results from + the queue, resolve them, and if they resolve, emit them. + + This exists to prevent disrupting the scan with huge batches of DNS resolutions. + """ + while 1: + results, source_event, tags = await self.resolve_and_emit_queue.get() + self.verbose(f"Resolving batch of {len(results):,} results") + async with self._task_counter.count(f"{self.name}.resolve_and_emit()"): + async for hostname, r in self.helpers.resolve_batch(results, type=("A", "CNAME")): + if not r: + self.debug(f"Discarding {hostname} because it didn't resolve") + continue + self.add_found(hostname) + if source_event is None: + source_event = self.source_events.get(hostname) + if source_event is None: + self.warning(f"Could not correlate source event from: {hostname}") + source_event = self.scan.root_event + kwargs = {"abort_if": self.abort_if, "tags": tags} + await self.emit_event(hostname, "DNS_NAME", source_event, **kwargs) + + @property + def running(self): + return super().running or self.resolve_and_emit_queue.qsize() > 0 async def _canary_check(self, domain, num_checks=50): random_subdomains = list(self.gen_random_subdomains(num_checks)) @@ -339,6 +378,9 @@ def add_mutation(_domain_hash, m): self.mutations_tried.add(h) mutations.add(m) + num_base_mutations = len(base_mutations) + self.debug(f"Base mutations for {domain}: {num_base_mutations:,}") + # try every subdomain everywhere else for _domain, _subdomains in found: if _domain == domain: @@ -346,10 +388,7 @@ def add_mutation(_domain_hash, m): for s in _subdomains: first_segment = s.split(".")[0] # skip stuff with lots of numbers (e.g. PTRs) - digits = self.digit_regex.findall(first_segment) - excessive_digits = len(digits) > 2 - long_digits = any(len(d) > 3 for d in digits) - if excessive_digits or long_digits: + if self.has_excessive_digits(first_segment): continue add_mutation(domain_hash, first_segment) for word in self.helpers.extract_words( @@ -357,6 +396,9 @@ def add_mutation(_domain_hash, m): ): add_mutation(domain_hash, word) + num_massdns_mutations = len(mutations) - num_base_mutations + self.debug(f"Mutations from previous subdomains for {domain}: {num_massdns_mutations:,}") + # numbers + devops mutations for mutation in self.helpers.word_cloud.mutations( subdomains, cloud=False, numbers=3, number_padding=1 @@ -365,24 +407,26 @@ def add_mutation(_domain_hash, m): m = delimiter.join(mutation).lower() add_mutation(domain_hash, m) + num_word_cloud_mutations = len(mutations) - num_massdns_mutations + self.debug(f"Mutations added by word cloud for {domain}: {num_word_cloud_mutations:,}") + # special dns mutator + self.debug( + f"DNS Mutator size: {len(self.helpers.word_cloud.dns_mutator):,} (limited to {self.max_mutations:,})" + ) for subdomain in self.helpers.word_cloud.dns_mutator.mutations( subdomains, max_mutations=self.max_mutations ): add_mutation(domain_hash, subdomain) + num_mutations = len(mutations) - num_word_cloud_mutations + self.debug(f"Mutations added by DNS Mutator: {num_mutations:,}") + if mutations: self.info(f"Trying {len(mutations):,} mutations against {domain} ({i+1}/{len(found)})") results = list(await self.massdns(query, mutations)) - for hostname in results: - source_event = self.source_events.get(hostname) - if source_event is None: - self.warning(f"Could not correlate source event from: {hostname}") - source_event = self.scan.root_event - await self.emit_result( - hostname, source_event, query, tags=[f"mutation-{self._mutation_run}"] - ) if results: + await self.resolve_and_emit_queue.put((results, None, [f"mutation-{self.mutation_run}"])) found_mutations = True continue break @@ -390,7 +434,7 @@ def add_mutation(_domain_hash, m): self.warning(e) if found_mutations: - self._mutation_run += 1 + self.mutation_run += 1 def add_found(self, host): if not isinstance(host, str): @@ -422,3 +466,16 @@ def gen_random_subdomains(self, n=50): yield subdomain for _ in range(5): yield self.helpers.rand_string(length=8, digits=False) + + def has_excessive_digits(self, d): + """ + Identifies dns names with excessive numbers, e.g.: + - w1-2-3.evilcorp.com + - ptr1234.evilcorp.com + """ + digits = self.digit_regex.findall(d) + excessive_digits = len(digits) > 2 + long_digits = any(len(d) > 3 for d in digits) + if excessive_digits or long_digits: + return True + return False diff --git a/bbot/modules/newsletters.py b/bbot/modules/newsletters.py index 62ef98463..a59cc30e3 100644 --- a/bbot/modules/newsletters.py +++ b/bbot/modules/newsletters.py @@ -2,12 +2,10 @@ # thanks to BBOT's sub-domain enumeration) looking for the presence of an 'email type' that also # contains a 'placeholder'. The combination of these two HTML items usually signify the presence # of an "Enter Your Email Here" type Newsletter Subscription service. This module could be used -# to find newsletters for a future email bombing attack and/or find user-input fields that could -# be be susceptible to overflows or injections. +# to find newsletters for a future email bombing attack. from .base import BaseModule import re -from bs4 import BeautifulSoup # Known Websites with Newsletters # https://futureparty.com/ @@ -16,8 +14,6 @@ # https://www.milkkarten.net/ # https://geekout.mattnavarra.com/ -deps_pip = ["beautifulsoup4"] - class newsletters(BaseModule): watched_events = ["HTTP_RESPONSE"] @@ -37,7 +33,7 @@ def find_type(self, soup): async def handle_event(self, event): if event.data["status_code"] == 200: - soup = BeautifulSoup(event.data["body"], "html.parser") + soup = self.helpers.beautifulsoup(event.data["body"], "html.parser") result = self.find_type(soup) if result: description = f"Found a Newsletter Submission Form that could be used for email bombing attacks" diff --git a/bbot/modules/output/emails.py b/bbot/modules/output/emails.py index 029bc5aca..e96c5d97c 100644 --- a/bbot/modules/output/emails.py +++ b/bbot/modules/output/emails.py @@ -12,14 +12,19 @@ class Emails(Human): output_filename = "emails.txt" + async def setup(self): + self.emails_written = 0 + return await super().setup() + def _scope_distance_check(self, event): return BaseModule._scope_distance_check(self, event) async def handle_event(self, event): if self.file is not None: + self.emails_written += 1 self.file.write(f"{event.data}\n") self.file.flush() async def report(self): if getattr(self, "_file", None) is not None: - self.info(f"Saved email addresses to {self.output_file}") + self.info(f"Saved {self.emails_written:,} email addresses to {self.output_file}") diff --git a/bbot/modules/output/http.py b/bbot/modules/output/http.py index 10ca1c8df..014610736 100644 --- a/bbot/modules/output/http.py +++ b/bbot/modules/output/http.py @@ -13,6 +13,7 @@ class HTTP(BaseOutputModule): "username": "", "password": "", "timeout": 10, + "siem_friendly": False, } options_desc = { "url": "Web URL", @@ -21,12 +22,14 @@ class HTTP(BaseOutputModule): "username": "Username (basic auth)", "password": "Password (basic auth)", "timeout": "HTTP timeout", + "siem_friendly": "Format JSON in a SIEM-friendly way for ingestion into Elastic, Splunk, etc.", } async def setup(self): self.url = self.config.get("url", "") self.method = self.config.get("method", "POST") self.timeout = self.config.get("timeout", 10) + self.siem_friendly = self.config.get("siem_friendly", False) self.headers = {} bearer = self.config.get("bearer", "") if bearer: @@ -52,7 +55,7 @@ async def handle_event(self, event): method=self.method, auth=self.auth, headers=self.headers, - json=dict(event), + json=event.json(siem_friendly=self.siem_friendly), raise_error=True, ) break diff --git a/bbot/modules/output/json.py b/bbot/modules/output/json.py index a380ac9a1..bf8517db9 100644 --- a/bbot/modules/output/json.py +++ b/bbot/modules/output/json.py @@ -21,9 +21,7 @@ async def setup(self): return True async def handle_event(self, event): - event_json = dict(event) - if self.siem_friendly: - event_json["data"] = {event.type: event_json.pop("data", "")} + event_json = event.json(siem_friendly=self.siem_friendly) event_str = json.dumps(event_json) if self.file is not None: self.file.write(event_str + "\n") diff --git a/bbot/modules/output/splunk.py b/bbot/modules/output/splunk.py new file mode 100644 index 000000000..242f1759e --- /dev/null +++ b/bbot/modules/output/splunk.py @@ -0,0 +1,59 @@ +from bbot.core.errors import RequestError + +from bbot.modules.output.base import BaseOutputModule + + +class Splunk(BaseOutputModule): + watched_events = ["*"] + meta = {"description": "Send every event to a splunk instance through HTTP Event Collector"} + options = { + "url": "", + "hectoken": "", + "index": "", + "source": "", + "timeout": 10, + } + options_desc = { + "url": "Web URL", + "hectoken": "HEC Token", + "index": "Index to send data to", + "source": "Source path to be added to the metadata", + "timeout": "HTTP timeout", + } + + async def setup(self): + self.url = self.config.get("url", "") + self.source = self.config.get("source", "bbot") + self.index = self.config.get("index", "main") + self.timeout = self.config.get("timeout", 10) + self.headers = {} + + hectoken = self.config.get("hectoken", "") + if hectoken: + self.headers["Authorization"] = f"Splunk {hectoken}" + if not self.url: + return False, "Must set URL" + if not self.source: + self.warning("Please provide a source") + return True + + async def handle_event(self, event): + while 1: + try: + data = { + "index": self.index, + "source": self.source, + "sourcetype": "_json", + "event": event.json(), + } + await self.helpers.request( + url=self.url, + method="POST", + headers=self.headers, + json=data, + raise_error=True, + ) + break + except RequestError as e: + self.warning(f"Error sending {event}: {e}, retrying...") + await self.helpers.sleep(1) diff --git a/bbot/modules/output/subdomains.py b/bbot/modules/output/subdomains.py index 49dea2db8..bfb7174ac 100644 --- a/bbot/modules/output/subdomains.py +++ b/bbot/modules/output/subdomains.py @@ -15,6 +15,7 @@ class Subdomains(Human): async def setup(self): self.include_unresolved = self.config.get("include_unresolved", False) + self.subdomains_written = 0 return await super().setup() async def filter_event(self, event): @@ -27,9 +28,10 @@ def _scope_distance_check(self, event): async def handle_event(self, event): if self.file is not None: + self.subdomains_written += 1 self.file.write(f"{event.data}\n") self.file.flush() async def report(self): if getattr(self, "_file", None) is not None: - self.info(f"Saved subdomains to {self.output_file}") + self.info(f"Saved {self.subdomains_written:,} subdomains to {self.output_file}") diff --git a/bbot/modules/sitedossier.py b/bbot/modules/sitedossier.py index 0c797296a..86872c052 100644 --- a/bbot/modules/sitedossier.py +++ b/bbot/modules/sitedossier.py @@ -43,5 +43,5 @@ async def query(self, query, parse_fn=None, request_fn=None): results.add(hostname) yield hostname if ' 50 update_md_files("BBOT MODULES", bbot_module_table) + # BBOT output modules + bbot_output_module_table = module_loader.modules_table(mod_type="output") + assert len(bbot_output_module_table.splitlines()) > 10 + update_md_files("BBOT OUTPUT MODULES", bbot_output_module_table) + # BBOT module options bbot_module_options_table = module_loader.modules_options_table() assert len(bbot_module_options_table.splitlines()) > 100 diff --git a/bbot/test/test_step_1/test_events.py b/bbot/test/test_step_1/test_events.py index ccbf17c1f..cadde29ad 100644 --- a/bbot/test/test_step_1/test_events.py +++ b/bbot/test/test_step_1/test_events.py @@ -385,6 +385,19 @@ async def test_events(events, scan, helpers, bbot_config): assert reconstituted_event.type == "DNS_NAME" assert "127.0.0.1" in reconstituted_event.resolved_hosts + # SIEM-friendly serialize/deserialize + json_event_siemfriendly = db_event.json(siem_friendly=True) + assert json_event_siemfriendly["scope_distance"] == 1 + assert json_event_siemfriendly["data"] == {"DNS_NAME": "evilcorp.com"} + assert json_event_siemfriendly["type"] == "DNS_NAME" + assert json_event_siemfriendly["timestamp"] == timestamp + reconstituted_event2 = event_from_json(json_event_siemfriendly, siem_friendly=True) + assert reconstituted_event2.scope_distance == 1 + assert reconstituted_event2.timestamp.timestamp() == timestamp + assert reconstituted_event2.data == "evilcorp.com" + assert reconstituted_event2.type == "DNS_NAME" + assert "127.0.0.1" in reconstituted_event2.resolved_hosts + http_response = scan.make_event(httpx_response, "HTTP_RESPONSE", source=scan.root_event) assert http_response.source_id == scan.root_event.id assert http_response.data["input"] == "http://example.com:80" diff --git a/bbot/test/test_step_1/test_helpers.py b/bbot/test/test_step_1/test_helpers.py index b972c8561..c8045e595 100644 --- a/bbot/test/test_step_1/test_helpers.py +++ b/bbot/test/test_step_1/test_helpers.py @@ -52,8 +52,14 @@ async def test_helpers_misc(helpers, scan, bbot_scanner, bbot_config, bbot_https ### MISC ### assert helpers.is_domain("evilcorp.co.uk") assert not helpers.is_domain("www.evilcorp.co.uk") + assert helpers.is_domain("evilcorp.notreal") + assert not helpers.is_domain("asdf.evilcorp.notreal") + assert not helpers.is_domain("notreal") assert helpers.is_subdomain("www.evilcorp.co.uk") assert not helpers.is_subdomain("evilcorp.co.uk") + assert helpers.is_subdomain("www.evilcorp.notreal") + assert not helpers.is_subdomain("evilcorp.notreal") + assert not helpers.is_subdomain("notreal") assert helpers.is_url("http://evilcorp.co.uk/asdf?a=b&c=d#asdf") assert helpers.is_url("https://evilcorp.co.uk/asdf?a=b&c=d#asdf") assert helpers.is_uri("ftp://evilcorp.co.uk") == True @@ -67,6 +73,9 @@ async def test_helpers_misc(helpers, scan, bbot_scanner, bbot_config, bbot_https assert helpers.parent_domain("www.evilcorp.co.uk") == "evilcorp.co.uk" assert helpers.parent_domain("evilcorp.co.uk") == "evilcorp.co.uk" assert helpers.parent_domain("localhost") == "localhost" + assert helpers.parent_domain("www.evilcorp.notreal") == "evilcorp.notreal" + assert helpers.parent_domain("evilcorp.notreal") == "evilcorp.notreal" + assert helpers.parent_domain("notreal") == "notreal" assert list(helpers.domain_parents("test.www.evilcorp.co.uk")) == ["www.evilcorp.co.uk", "evilcorp.co.uk"] assert list(helpers.domain_parents("www.evilcorp.co.uk", include_self=True)) == [ "www.evilcorp.co.uk", @@ -173,6 +182,12 @@ async def test_helpers_misc(helpers, scan, bbot_scanner, bbot_config, bbot_https assert helpers.split_domain("192.168.0.1") == ("", "192.168.0.1") assert helpers.split_domain("dead::beef") == ("", "dead::beef") + assert helpers.subdomain_depth("a.s.d.f.evilcorp.co.uk") == 4 + assert helpers.subdomain_depth("a.s.d.f.evilcorp.com") == 4 + assert helpers.subdomain_depth("evilcorp.com") == 0 + assert helpers.subdomain_depth("a.evilcorp.com") == 1 + assert helpers.subdomain_depth("a.s.d.f.evilcorp.notreal") == 4 + assert helpers.split_host_port("https://evilcorp.co.uk") == ("evilcorp.co.uk", 443) assert helpers.split_host_port("http://evilcorp.co.uk:666") == ("evilcorp.co.uk", 666) assert helpers.split_host_port("evilcorp.co.uk:666") == ("evilcorp.co.uk", 666) @@ -441,20 +456,6 @@ async def test_helpers_misc(helpers, scan, bbot_scanner, bbot_config, bbot_https assert helpers.cache_get("string", cache_hrs=24 * 7) is None assert helpers.cache_get("string", cache_hrs=24 * 14) == "wat" - cache_dict = helpers.CacheDict(max_size=10) - cache_dict.put("1", 2) - assert cache_dict["1"] == 2 - assert cache_dict.get("1") == 2 - assert len(cache_dict) == 1 - cache_dict["2"] = 3 - assert cache_dict["2"] == 3 - assert cache_dict.get("2") == 3 - assert len(cache_dict) == 2 - for i in range(20): - cache_dict[str(i)] = i + 1 - assert len(cache_dict) == 10 - assert tuple(cache_dict) == tuple(hash(str(x)) for x in range(10, 20)) - test_file = Path(scan.config["home"]) / "testfile.asdf" with open(test_file, "w") as f: for i in range(100): diff --git a/bbot/test/test_step_1/test_manager_scope_accuracy.py b/bbot/test/test_step_1/test_manager_scope_accuracy.py index 453625a06..de9ffd72c 100644 --- a/bbot/test/test_step_1/test_manager_scope_accuracy.py +++ b/bbot/test/test_step_1/test_manager_scope_accuracy.py @@ -681,7 +681,7 @@ def custom_setup(scan): assert 0 == len([e for e in events if e.type == "OPEN_TCP_PORT" and e.data == "test.notreal:9999"]) assert 0 == len([e for e in events if e.type == "DNS_NAME_UNRESOLVED" and e.data == "notreal"]) - assert len(all_events) == 14 + assert len(all_events) == 13 assert 1 == len([e for e in all_events if e.type == "IP_RANGE" and e.data == "127.0.0.0/31" and e.internal == False and e.scope_distance == 0]) assert 1 == len([e for e in all_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.0" and e.internal == True and e.scope_distance == 0]) assert 2 == len([e for e in all_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.1" and e.internal == False and e.scope_distance == 0]) @@ -692,9 +692,8 @@ def custom_setup(scan): assert 1 == len([e for e in all_events if e.type == "OPEN_TCP_PORT" and e.data == "www.bbottest.notreal:9999" and e.internal == True and e.scope_distance == 1 and str(e.module) == "speculate"]) assert 1 == len([e for e in all_events if e.type == "DNS_NAME_UNRESOLVED" and e.data == "bbottest.notreal" and e.internal == True and e.scope_distance == 2 and str(e.module) == "speculate"]) assert 1 == len([e for e in all_events if e.type == "OPEN_TCP_PORT" and e.data == "test.notreal:9999" and e.internal == True and e.scope_distance == 0 and str(e.module) == "speculate"]) - assert 1 == len([e for e in all_events if e.type == "DNS_NAME_UNRESOLVED" and e.data == "notreal" and e.internal == True and e.scope_distance == 1 and str(e.module) == "speculate"]) - assert len(all_events_nodups) == 12 + assert len(all_events_nodups) == 11 assert 1 == len([e for e in all_events_nodups if e.type == "IP_RANGE" and e.data == "127.0.0.0/31" and e.internal == False and e.scope_distance == 0]) assert 1 == len([e for e in all_events_nodups if e.type == "IP_ADDRESS" and e.data == "127.0.0.0" and e.internal == True and e.scope_distance == 0]) assert 1 == len([e for e in all_events_nodups if e.type == "IP_ADDRESS" and e.data == "127.0.0.1" and e.internal == False and e.scope_distance == 0]) @@ -705,7 +704,6 @@ def custom_setup(scan): assert 1 == len([e for e in all_events_nodups if e.type == "OPEN_TCP_PORT" and e.data == "www.bbottest.notreal:9999" and e.internal == True and e.scope_distance == 1 and str(e.module) == "speculate"]) assert 1 == len([e for e in all_events_nodups if e.type == "DNS_NAME_UNRESOLVED" and e.data == "bbottest.notreal" and e.internal == True and e.scope_distance == 2 and str(e.module) == "speculate"]) assert 1 == len([e for e in all_events_nodups if e.type == "OPEN_TCP_PORT" and e.data == "test.notreal:9999" and e.internal == True and e.scope_distance == 0 and str(e.module) == "speculate"]) - assert 1 == len([e for e in all_events_nodups if e.type == "DNS_NAME_UNRESOLVED" and e.data == "notreal" and e.internal == True and e.scope_distance == 1 and str(e.module) == "speculate"]) for _graph_output_events in (graph_output_events, graph_output_batch_events): assert len(_graph_output_events) == 6 @@ -719,7 +717,6 @@ def custom_setup(scan): assert 0 == len([e for e in _graph_output_events if e.type == "OPEN_TCP_PORT" and e.data == "www.bbottest.notreal:9999"]) assert 0 == len([e for e in _graph_output_events if e.type == "DNS_NAME_UNRESOLVED" and e.data == "bbottest.notreal"]) assert 0 == len([e for e in _graph_output_events if e.type == "OPEN_TCP_PORT" and e.data == "test.notreal:9999"]) - assert 0 == len([e for e in _graph_output_events if e.type == "DNS_NAME_UNRESOLVED" and e.data == "notreal"]) # sslcert with out-of-scope chain events, all_events, all_events_nodups, graph_output_events, graph_output_batch_events = await do_scan( @@ -739,9 +736,8 @@ def custom_setup(scan): assert 1 == len([e for e in events if e.type == "DNS_NAME" and e.data == "test.notreal" and e.internal == False and e.scope_distance == 0 and str(e.module) == "sslcert"]) assert 0 == len([e for e in events if e.type == "DNS_NAME" and e.data == "www.bbottest.notreal"]) assert 0 == len([e for e in events if e.type == "OPEN_TCP_PORT" and e.data == "test.notreal:9999"]) - assert 0 == len([e for e in events if e.type == "DNS_NAME_UNRESOLVED" and e.data == "notreal"]) - assert len(all_events) == 12 + assert len(all_events) == 11 assert 1 == len([e for e in all_events if e.type == "IP_RANGE" and e.data == "127.0.0.0/31" and e.internal == False and e.scope_distance == 0]) assert 1 == len([e for e in all_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.0" and e.internal == True and e.scope_distance == 1]) assert 2 == len([e for e in all_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.1" and e.internal == True and e.scope_distance == 1]) @@ -750,9 +746,8 @@ def custom_setup(scan): assert 1 == len([e for e in all_events if e.type == "DNS_NAME" and e.data == "test.notreal" and e.internal == False and e.scope_distance == 0 and str(e.module) == "sslcert"]) assert 1 == len([e for e in all_events if e.type == "DNS_NAME" and e.data == "www.bbottest.notreal" and e.internal == True and e.scope_distance == 2 and str(e.module) == "sslcert"]) assert 1 == len([e for e in all_events if e.type == "OPEN_TCP_PORT" and e.data == "test.notreal:9999" and e.internal == True and e.scope_distance == 0 and str(e.module) == "speculate"]) - assert 1 == len([e for e in all_events if e.type == "DNS_NAME_UNRESOLVED" and e.data == "notreal" and e.internal == True and e.scope_distance == 1 and str(e.module) == "speculate"]) - assert len(all_events_nodups) == 10 + assert len(all_events_nodups) == 9 assert 1 == len([e for e in all_events_nodups if e.type == "IP_RANGE" and e.data == "127.0.0.0/31" and e.internal == False and e.scope_distance == 0]) assert 1 == len([e for e in all_events_nodups if e.type == "IP_ADDRESS" and e.data == "127.0.0.0" and e.internal == True and e.scope_distance == 1]) assert 1 == len([e for e in all_events_nodups if e.type == "IP_ADDRESS" and e.data == "127.0.0.1" and e.internal == True and e.scope_distance == 1]) @@ -761,7 +756,6 @@ def custom_setup(scan): assert 1 == len([e for e in all_events_nodups if e.type == "DNS_NAME" and e.data == "test.notreal" and e.internal == False and e.scope_distance == 0 and str(e.module) == "sslcert"]) assert 1 == len([e for e in all_events_nodups if e.type == "DNS_NAME" and e.data == "www.bbottest.notreal" and e.internal == True and e.scope_distance == 2 and str(e.module) == "sslcert"]) assert 1 == len([e for e in all_events_nodups if e.type == "OPEN_TCP_PORT" and e.data == "test.notreal:9999" and e.internal == True and e.scope_distance == 0 and str(e.module) == "speculate"]) - assert 1 == len([e for e in all_events_nodups if e.type == "DNS_NAME_UNRESOLVED" and e.data == "notreal" and e.internal == True and e.scope_distance == 1 and str(e.module) == "speculate"]) for _graph_output_events in (graph_output_events, graph_output_batch_events): assert len(_graph_output_events) == 5 @@ -773,7 +767,6 @@ def custom_setup(scan): assert 1 == len([e for e in graph_output_events if e.type == "DNS_NAME" and e.data == "test.notreal" and e.internal == False and e.scope_distance == 0 and str(e.module) == "sslcert"]) assert 0 == len([e for e in graph_output_events if e.type == "DNS_NAME" and e.data == "www.bbottest.notreal"]) assert 0 == len([e for e in graph_output_events if e.type == "OPEN_TCP_PORT" and e.data == "test.notreal:9999"]) - assert 0 == len([e for e in graph_output_events if e.type == "DNS_NAME_UNRESOLVED" and e.data == "notreal"]) @pytest.mark.asyncio diff --git a/bbot/test/test_step_1/test_web.py b/bbot/test/test_step_1/test_web.py index 13edaf725..675197265 100644 --- a/bbot/test/test_step_1/test_web.py +++ b/bbot/test/test_step_1/test_web.py @@ -24,7 +24,7 @@ async def test_web_helpers(bbot_scanner, bbot_config, bbot_httpserver): # should fail because URL is not in-scope assert response.status_code == 500 response = await scan2.helpers.request(url) - # should suceed because URL is in-scope + # should succeed because URL is in-scope assert response.status_code == 200 assert response.text == "test_http_helpers_yep" @@ -45,6 +45,30 @@ async def test_web_helpers(bbot_scanner, bbot_config, bbot_httpserver): assert filename2.is_file() with open(filename2) as f: assert f.read() == download_content + + # beautifulsoup + download_content = """ +
+

Example Domain

+

This domain is for use in illustrative examples in documents. You may use this + domain in literature without prior coordination or asking for permission.

+

More information...

+
+ """ + + path = "/test_http_helpers_beautifulsoup" + url = bbot_httpserver.url_for(path) + bbot_httpserver.expect_request(uri=path).respond_with_data(download_content, status=200) + webpage = await scan1.helpers.request(url) + assert webpage, f"Webpage is False" + soup = scan1.helpers.beautifulsoup(webpage, "html.parser") + assert soup, f"Soup is False" + # pretty_print = soup.prettify() + # assert pretty_print, f"PrettyPrint is False" + # scan1.helpers.log.info(f"{pretty_print}") + html_text = soup.find(text="Example Domain") + assert html_text, f"Find HTML Text is False" + # 404 path = "/test_http_helpers_download_404" url = bbot_httpserver.url_for(path) diff --git a/bbot/test/test_step_2/module_tests/test_module_ffuf_shortnames.py b/bbot/test/test_step_2/module_tests/test_module_ffuf_shortnames.py index 3d59653eb..cbbec11ea 100644 --- a/bbot/test/test_step_2/module_tests/test_module_ffuf_shortnames.py +++ b/bbot/test/test_step_2/module_tests/test_module_ffuf_shortnames.py @@ -177,9 +177,9 @@ def check(self, module_test, events): basic_detection = False directory_detection = False prefix_detection = False - delimeter_detection = False - directory_delimeter_detection = False - prefix_delimeter_detection = False + delimiter_detection = False + directory_delimiter_detection = False + prefix_delimiter_detection = False short_extensions_detection = False for e in events: @@ -191,18 +191,18 @@ def check(self, module_test, events): if e.data == "http://127.0.0.1:8888/adm_portal.aspx": prefix_detection = True if e.data == "http://127.0.0.1:8888/abcconsole.aspx": - delimeter_detection = True + delimiter_detection = True if e.data == "http://127.0.0.1:8888/abcconsole.aspx": - directory_delimeter_detection = True + directory_delimiter_detection = True if e.data == "http://127.0.0.1:8888/xyzdirectory/": - prefix_delimeter_detection = True + prefix_delimiter_detection = True if e.data == "http://127.0.0.1:8888/short.pl": short_extensions_detection = True assert basic_detection assert directory_detection assert prefix_detection - assert delimeter_detection - assert directory_delimeter_detection - assert prefix_delimeter_detection + assert delimiter_detection + assert directory_delimiter_detection + assert prefix_delimiter_detection assert short_extensions_detection diff --git a/bbot/test/test_step_2/module_tests/test_module_http.py b/bbot/test/test_step_2/module_tests/test_module_http.py index 3b4e819b9..d0afcefb2 100644 --- a/bbot/test/test_step_2/module_tests/test_module_http.py +++ b/bbot/test/test_step_2/module_tests/test_module_http.py @@ -1,3 +1,6 @@ +import json +import httpx + from .base import ModuleTestBase @@ -15,10 +18,46 @@ class TestHTTP(ModuleTestBase): } } + def verify_data(self, j): + return j["data"] == "blacklanternsecurity.com" and j["type"] == "DNS_NAME" + async def setup_after_prep(self, module_test): + self.got_event = False + self.headers_correct = False + self.method_correct = False + self.url_correct = False + + async def custom_callback(request): + j = json.loads(request.content) + if request.url == self.downstream_url: + self.url_correct = True + if request.method == "PUT": + self.method_correct = True + if "Authorization" in request.headers: + self.headers_correct = True + if self.verify_data(j): + self.got_event = True + return httpx.Response( + status_code=200, + ) + + module_test.httpx_mock.add_callback(custom_callback) + module_test.httpx_mock.add_callback(custom_callback) module_test.httpx_mock.add_response( method="PUT", headers={"Authorization": "bearer auth_token"}, url=self.downstream_url ) def check(self, module_test, events): - pass + assert self.got_event == True + assert self.headers_correct == True + assert self.method_correct == True + assert self.url_correct == True + + +class TestHTTPSIEMFriendly(TestHTTP): + modules_overrides = ["http"] + config_overrides = {"output_modules": {"http": dict(TestHTTP.config_overrides["output_modules"]["http"])}} + config_overrides["output_modules"]["http"]["siem_friendly"] = True + + def verify_data(self, j): + return j["data"] == {"DNS_NAME": "blacklanternsecurity.com"} and j["type"] == "DNS_NAME" diff --git a/bbot/test/test_step_2/module_tests/test_module_splunk.py b/bbot/test/test_step_2/module_tests/test_module_splunk.py new file mode 100644 index 000000000..67d67a4ef --- /dev/null +++ b/bbot/test/test_step_2/module_tests/test_module_splunk.py @@ -0,0 +1,58 @@ +import json +import httpx + +from .base import ModuleTestBase + + +class TestSplunk(ModuleTestBase): + downstream_url = "https://splunk.blacklanternsecurity.fakedomain:1234/services/collector" + config_overrides = { + "output_modules": { + "splunk": { + "url": downstream_url, + "hectoken": "HECTOKEN", + "index": "bbot_index", + "source": "bbot_source", + } + } + } + + def verify_data(self, j): + if not j["source"] == "bbot_source": + return False + if not j["index"] == "bbot_index": + return False + data = j["event"] + if not data["data"] == "blacklanternsecurity.com" and data["type"] == "DNS_NAME": + return False + return True + + async def setup_after_prep(self, module_test): + self.url_correct = False + self.method_correct = False + self.got_event = False + self.headers_correct = False + + async def custom_callback(request): + j = json.loads(request.content) + if request.url == self.downstream_url: + self.url_correct = True + if request.method == "POST": + self.method_correct = True + if "Authorization" in request.headers: + self.headers_correct = True + if self.verify_data(j): + self.got_event = True + return httpx.Response( + status_code=200, + ) + + module_test.httpx_mock.add_callback(custom_callback) + module_test.httpx_mock.add_callback(custom_callback) + module_test.httpx_mock.add_response() + + def check(self, module_test, events): + assert self.got_event == True + assert self.headers_correct == True + assert self.method_correct == True + assert self.url_correct == True diff --git a/docs/contribution.md b/docs/contribution.md index 2d36cfe44..175c3e7af 100644 --- a/docs/contribution.md +++ b/docs/contribution.md @@ -134,7 +134,6 @@ BBOT automates module dependencies with **Ansible**. If your module relies on a ```python class MyModule(BaseModule): ... - deps_pip = ["beautifulsoup4"] deps_apt = ["chromium-browser"] deps_ansible = [ { diff --git a/docs/modules/list_of_modules.md b/docs/modules/list_of_modules.md index a3ffc76c6..ebf4f182f 100644 --- a/docs/modules/list_of_modules.md +++ b/docs/modules/list_of_modules.md @@ -107,6 +107,7 @@ | neo4j | output | No | Output to Neo4j | | * | | | python | output | No | Output via Python API | | * | | | slack | output | No | Message a Slack channel when certain events are encountered | | * | | +| splunk | output | No | Send every event to a splunk instance through HTTP Event Collector | | * | | | subdomains | output | No | Output only resolved, in-scope subdomains | subdomain-enum | DNS_NAME, DNS_NAME_UNRESOLVED | | | teams | output | No | Message a Teams channel when certain events are encountered | | * | | | web_report | output | No | Create a markdown report with web assets | | FINDING, TECHNOLOGY, URL, VHOST, VULNERABILITY | | diff --git a/docs/scanning/advanced.md b/docs/scanning/advanced.md index 0baaf35c8..8207b7ce7 100644 --- a/docs/scanning/advanced.md +++ b/docs/scanning/advanced.md @@ -33,16 +33,10 @@ asyncio.run(main()) ```text -usage: bbot [-h] [--help-all] [-t TARGET [TARGET ...]] - [-w WHITELIST [WHITELIST ...]] [-b BLACKLIST [BLACKLIST ...]] - [--strict-scope] [-m MODULE [MODULE ...]] [-l] - [-em MODULE [MODULE ...]] [-f FLAG [FLAG ...]] [-lf] - [-rf FLAG [FLAG ...]] [-ef FLAG [FLAG ...]] - [-om MODULE [MODULE ...]] [--allow-deadly] [-n SCAN_NAME] - [-o DIR] [-c [CONFIG ...]] [-v] [-d] [-s] [--force] [-y] - [--dry-run] [--current-config] - [--no-deps | --force-deps | --retry-deps | --ignore-failed-deps | --install-all-deps] - [-a] [--version] +usage: bbot [-h] [--help-all] [-t TARGET [TARGET ...]] [-w WHITELIST [WHITELIST ...]] [-b BLACKLIST [BLACKLIST ...]] [--strict-scope] [-m MODULE [MODULE ...]] [-l] + [-em MODULE [MODULE ...]] [-f FLAG [FLAG ...]] [-lf] [-rf FLAG [FLAG ...]] [-ef FLAG [FLAG ...]] [-om MODULE [MODULE ...]] [--allow-deadly] [-n SCAN_NAME] [-o DIR] + [-c [CONFIG ...]] [-v] [-d] [-s] [--force] [-y] [--dry-run] [--current-config] [--no-deps | --force-deps | --retry-deps | --ignore-failed-deps | --install-all-deps] [-a] + [--version] Bighuge BLS OSINT Tool @@ -73,7 +67,7 @@ Modules: -ef FLAG [FLAG ...], --exclude-flags FLAG [FLAG ...] Disable modules with these flags. (e.g. -ef aggressive) -om MODULE [MODULE ...], --output-modules MODULE [MODULE ...] - Output module(s). Choices: asset_inventory,csv,discord,emails,http,human,json,neo4j,python,slack,subdomains,teams,web_report,websocket + Output module(s). Choices: asset_inventory,csv,discord,emails,http,human,json,neo4j,python,slack,splunk,subdomains,teams,web_report,websocket --allow-deadly Enable the use of highly aggressive modules Scan: diff --git a/docs/scanning/configuration.md b/docs/scanning/configuration.md index 2ed2d61de..d203f3ec4 100644 --- a/docs/scanning/configuration.md +++ b/docs/scanning/configuration.md @@ -291,6 +291,7 @@ Many modules accept their own configuration options. These options have the abil | modules.vhost.lines | int | take only the first N lines from the wordlist when finding directories | 5000 | | modules.vhost.wordlist | str | Wordlist containing subdomains | https://raw.githubusercontent.com/danielmiessler/SecLists/master/Discovery/DNS/subdomains-top1million-5000.txt | | modules.wafw00f.generic_detect | bool | When no specific WAF detections are made, try to perform a generic detect | True | +| modules.anubisdb.limit | int | Limit the number of subdomains returned per query (increasing this may slow the scan due to garbage results from this API) | 1000 | | modules.bevigil.api_key | str | BeVigil OSINT API Key | | | modules.bevigil.urls | bool | Emit URLs in addition to DNS_NAMEs | False | | modules.binaryedge.api_key | str | BinaryEdge API key | | @@ -308,6 +309,8 @@ Many modules accept their own configuration options. These options have the abil | modules.credshed.username | str | Credshed username | | | modules.dehashed.api_key | str | DeHashed API Key | | | modules.dehashed.username | str | Email Address associated with your API key | | +| modules.dnscommonsrv.max_event_handlers | int | How many instances of the module to run concurrently | 10 | +| modules.dnscommonsrv.top | int | How many of the top SRV records to check | 50 | | modules.fullhunt.api_key | str | FullHunt API Key | | | modules.github_codesearch.api_key | str | Github token | | | modules.github_codesearch.limit | int | Limit code search to this many results | 100 | @@ -320,6 +323,7 @@ Many modules accept their own configuration options. These options have the abil | modules.ipneighbor.num_bits | int | Netmask size (in CIDR notation) to check. Default is 4 bits (16 hosts) | 4 | | modules.ipstack.api_key | str | IPStack GeoIP API Key | | | modules.leakix.api_key | str | LeakIX API Key | | +| modules.massdns.max_depth | int | How many subdomains deep to brute force, i.e. 5.4.3.2.1.evilcorp.com | 5 | | modules.massdns.max_mutations | int | Max number of smart mutations per subdomain | 500 | | modules.massdns.max_resolvers | int | Number of concurrent massdns resolvers | 1000 | | modules.massdns.wordlist | str | Subdomain wordlist URL | https://raw.githubusercontent.com/danielmiessler/SecLists/master/Discovery/DNS/subdomains-top1million-5000.txt | @@ -346,6 +350,7 @@ Many modules accept their own configuration options. These options have the abil | output_modules.http.bearer | str | Authorization Bearer token | | | output_modules.http.method | str | HTTP method | POST | | output_modules.http.password | str | Password (basic auth) | | +| output_modules.http.siem_friendly | bool | Format JSON in a SIEM-friendly way for ingestion into Elastic, Splunk, etc. | False | | output_modules.http.timeout | int | HTTP timeout | 10 | | output_modules.http.url | str | Web URL | | | output_modules.http.username | str | Username (basic auth) | | @@ -360,6 +365,11 @@ Many modules accept their own configuration options. These options have the abil | output_modules.slack.event_types | list | Types of events to send | ['VULNERABILITY', 'FINDING'] | | output_modules.slack.min_severity | str | Only allow VULNERABILITY events of this severity or higher | LOW | | output_modules.slack.webhook_url | str | Discord webhook URL | | +| output_modules.splunk.hectoken | str | HEC Token | | +| output_modules.splunk.index | str | Index to send data to | | +| output_modules.splunk.source | str | Source path to be added to the metadata | | +| output_modules.splunk.timeout | int | HTTP timeout | 10 | +| output_modules.splunk.url | str | Web URL | | | output_modules.subdomains.include_unresolved | bool | Include unresolved subdomains in output | False | | output_modules.subdomains.output_file | str | Output to file | | | output_modules.teams.event_types | list | Types of events to send | ['VULNERABILITY', 'FINDING'] | diff --git a/docs/scanning/events.md b/docs/scanning/events.md index 6628fac46..d2aaa4595 100644 --- a/docs/scanning/events.md +++ b/docs/scanning/events.md @@ -51,7 +51,7 @@ Below is a full list of event types along with which modules produce/consume the | Event Type | # Consuming Modules | # Producing Modules | Consuming Modules | Producing Modules | |---------------------|-----------------------|-----------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| * | 11 | 0 | affiliates, csv, discord, http, human, json, neo4j, python, slack, teams, websocket | | +| * | 12 | 0 | affiliates, csv, discord, http, human, json, neo4j, python, slack, splunk, teams, websocket | | | ASN | 0 | 1 | | asn | | AZURE_TENANT | 1 | 0 | speculate | | | CODE_REPOSITORY | 0 | 2 | | github_codesearch, github_org | diff --git a/docs/scanning/output.md b/docs/scanning/output.md index 81b4b8ede..af1db4737 100644 --- a/docs/scanning/output.md +++ b/docs/scanning/output.md @@ -1,6 +1,6 @@ # Output -By default, BBOT saves its output in TXT, JSON, and CSV formats: +By default, BBOT saves its output in TXT, JSON, and CSV formats. The filenames are logged at the end of each scan: ![bbot output](https://github.com/blacklanternsecurity/bbot/assets/20261699/bb3da441-2682-408f-b955-19b268823b82) Every BBOT scan gets a unique and mildly-entertaining name like **`demonic_jimmy`**. Output for that scan, including scan stats and any web screenshots, etc., are saved to a folder by that name in `~/.bbot/scans`. The most recent 20 scans are kept, and older ones are removed. You can change the location of BBOT's output with `--output`, and you can also pick a custom scan name with `--name`. @@ -135,6 +135,25 @@ output_modules: password: P@ssw0rd ``` +### Splunk + +The `splunk` output module sends [events](events.md) in JSON format to a desired splunk instance via [HEC](https://docs.splunk.com/Documentation/Splunk/9.2.0/Data/UsetheHTTPEventCollector). + +You can customize this output with the following config options: + +```yaml title="~/.bbot/config/bbot.yml" +output_modules: + splunk: + # The full URL with the URI `/services/collector/event` + url: https://localhost:8088/services/collector/event + # Generated from splunk webui + hectoken: xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + # Defaults to `main` if not set + index: my-specific-index + # Defaults to `bbot` if not set + source: /my/source.json +``` + ### Asset Inventory The `asset_inventory` module produces a CSV like this: diff --git a/poetry.lock b/poetry.lock index 676f41389..ab85c1785 100644 --- a/poetry.lock +++ b/poetry.lock @@ -186,6 +186,17 @@ d = ["aiohttp (>=3.7.4)", "aiohttp (>=3.7.4,!=3.9.0)"] jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"] uvloop = ["uvloop (>=0.15.2)"] +[[package]] +name = "cachetools" +version = "5.3.2" +description = "Extensible memoizing collections and decorators" +optional = false +python-versions = ">=3.7" +files = [ + {file = "cachetools-5.3.2-py3-none-any.whl", hash = "sha256:861f35a13a451f94e301ce2bec7cac63e881232ccce7ed67fab9b5df4d3beaa1"}, + {file = "cachetools-5.3.2.tar.gz", hash = "sha256:086ee420196f7b2ab9ca2db2520aca326318b68fe5ba8bc4d49cca91add450f2"}, +] + [[package]] name = "certifi" version = "2024.2.2" @@ -2027,7 +2038,6 @@ optional = false python-versions = "*" files = [ {file = "requests-file-2.0.0.tar.gz", hash = "sha256:20c5931629c558fda566cacc10cfe2cd502433e628f568c34c80d96a0cc95972"}, - {file = "requests_file-2.0.0-py2.py3-none-any.whl", hash = "sha256:3e493d390adb44aa102ebea827a48717336d5268968c370eaf19abaf5cae13bf"}, ] [package.dependencies] @@ -2088,6 +2098,17 @@ files = [ {file = "sniffio-1.3.0.tar.gz", hash = "sha256:e60305c5e5d314f5389259b7f22aaa33d8f7dee49763119234af3755c55b9101"}, ] +[[package]] +name = "socksio" +version = "1.0.0" +description = "Sans-I/O implementation of SOCKS4, SOCKS4A, and SOCKS5." +optional = false +python-versions = ">=3.6" +files = [ + {file = "socksio-1.0.0-py3-none-any.whl", hash = "sha256:95dc1f15f9b34e8d7b16f06d74b8ccf48f609af32ab33c608d08761c5dcbb1f3"}, + {file = "socksio-1.0.0.tar.gz", hash = "sha256:f88beb3da5b5c38b9890469de67d0cb0f9d494b78b106ca1845f96c10b91c4ac"}, +] + [[package]] name = "soupsieve" version = "2.5" @@ -2413,4 +2434,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "8d9864610f54050aec62bf75415e5b683a851323d054a38ff36e54d9d5c284e3" +content-hash = "e9c476ba44a5968f7bd6c9759ac4c6f8e679384bd6b0dd4f128af873a68a34da" diff --git a/pyproject.toml b/pyproject.toml index 93172c060..f16540fb8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,6 +46,8 @@ pydantic = "^2.4.2" httpx = "^0.26.0" cloudcheck = "^2.1.0.181" tldextract = "^5.1.1" +cachetools = "^5.3.2" +socksio = "^1.0.0" [tool.poetry.group.dev.dependencies] flake8 = "^6.0.0"