From b037471586ec026a3d643354b9dad9b3efd89cd1 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Tue, 23 Apr 2024 12:24:56 -0400 Subject: [PATCH 01/24] remove resolved/unresolved tags as they are redundant --- bbot/modules/anubisdb.py | 2 +- bbot/modules/internal/dns.py | 4 +--- bbot/test/test_step_1/test_dns.py | 3 +-- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/bbot/modules/anubisdb.py b/bbot/modules/anubisdb.py index 9864e3c6d8..bf4c88e935 100644 --- a/bbot/modules/anubisdb.py +++ b/bbot/modules/anubisdb.py @@ -30,7 +30,7 @@ def abort_if_pre(self, hostname): async def abort_if(self, event): # abort if dns name is unresolved - if not "resolved" in event.tags: + if event.type == "DNS_NAME_UNRESOLVED": return True, "DNS name is unresolved" return await super().abort_if(event) diff --git a/bbot/modules/internal/dns.py b/bbot/modules/internal/dns.py index ea5e4efcf1..b96b9b19c7 100644 --- a/bbot/modules/internal/dns.py +++ b/bbot/modules/internal/dns.py @@ -94,9 +94,7 @@ async def handle_event(self, event, kwargs): if rdtype not in dns_children: dns_tags.add(f"{rdtype.lower()}-error") - if dns_children: - dns_tags.add("resolved") - elif not event_is_ip: + if not dns_children and not event_is_ip: dns_tags.add("unresolved") for rdtype, children in dns_children.items(): diff --git a/bbot/test/test_step_1/test_dns.py b/bbot/test/test_step_1/test_dns.py index 05796e4645..aa2a279070 100644 --- a/bbot/test/test_step_1/test_dns.py +++ b/bbot/test/test_step_1/test_dns.py @@ -238,7 +238,6 @@ async def test_wildcards(bbot_scanner): "a-record", "target", "aaaa-wildcard", - "resolved", "in-scope", "subdomain", "aaaa-record", @@ -260,7 +259,7 @@ async def test_wildcards(bbot_scanner): for e in events if e.type == "DNS_NAME" and e.data == "asdfl.gashdgkjsadgsdf.github.io" - and all(t in e.tags for t in ("a-record", "target", "resolved", "in-scope", "subdomain", "aaaa-record")) + and all(t in e.tags for t in ("a-record", "target", "in-scope", "subdomain", "aaaa-record")) and not any(t in e.tags for t in ("wildcard", "a-wildcard", "aaaa-wildcard")) ] ) From 37a5889e7f3c97b4d5829f9dcfe0e7a3d6bc15fa Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Fri, 19 Apr 2024 13:52:50 -0400 Subject: [PATCH 02/24] tests for custom target types --- bbot/test/test_step_1/test_cli.py | 8 ++++++++ bbot/test/test_step_1/test_python_api.py | 5 +++++ 2 files changed, 13 insertions(+) diff --git a/bbot/test/test_step_1/test_cli.py b/bbot/test/test_step_1/test_cli.py index db2a5316d4..d879d863d1 100644 --- a/bbot/test/test_step_1/test_cli.py +++ b/bbot/test/test_step_1/test_cli.py @@ -190,6 +190,14 @@ async def test_cli_args(monkeypatch, caplog, clean_default_config): assert "| active" in caplog.text assert not "| passive" in caplog.text + # custom target type + caplog.clear() + assert not caplog.text + monkeypatch.setattr("sys.argv", ["bbot", "-t", "ORG:evilcorp"]) + result = await cli._main() + assert result == True + assert "[ORG_STUB] evilcorp TARGET" in caplog.text + # no args caplog.clear() assert not caplog.text diff --git a/bbot/test/test_step_1/test_python_api.py b/bbot/test/test_step_1/test_python_api.py index 14c2fae1d4..0155dcfb38 100644 --- a/bbot/test/test_step_1/test_python_api.py +++ b/bbot/test/test_step_1/test_python_api.py @@ -45,6 +45,11 @@ async def test_python_api(): Scanner("127.0.0.1", config={"home": bbot_home}) assert os.environ["BBOT_TOOLS"] == str(Path(bbot_home) / "tools") + # custom target types + custom_target_scan = Scanner("ORG:evilcorp") + events = [e async for e in custom_target_scan.async_start()] + assert 1 == len([e for e in events if e.type == "ORG_STUB" and e.data == "evilcorp" and "target" in e.tags]) + def test_python_api_sync(): from bbot.scanner import Scanner From b06355bc1cbbd8714786d11b08f66b59b11d583d Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Tue, 23 Apr 2024 12:09:10 -0400 Subject: [PATCH 03/24] fix small cli bug and add tests for it --- bbot/scanner/preset/preset.py | 6 ++++-- bbot/test/test_step_1/test_cli.py | 7 +++++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/bbot/scanner/preset/preset.py b/bbot/scanner/preset/preset.py index 60dac85dd9..57eb11a1a1 100644 --- a/bbot/scanner/preset/preset.py +++ b/bbot/scanner/preset/preset.py @@ -381,8 +381,10 @@ def bake(self): for flag in baked_preset.flags: for module, preloaded in baked_preset.module_loader.preloaded().items(): module_flags = preloaded.get("flags", []) + module_type = preloaded.get("type", "scan") if flag in module_flags: - baked_preset.add_module(module, raise_error=False) + self.log_debug(f'Enabling module "{module}" because it has flag "{flag}"') + baked_preset.add_module(module, module_type, raise_error=False) # ensure we have output modules if not baked_preset.output_modules: @@ -433,7 +435,7 @@ def internal_modules(self): return [m for m in self.modules if self.preloaded_module(m).get("type", "scan") == "internal"] def add_module(self, module_name, module_type="scan", raise_error=True): - self.log_debug(f'Adding module "{module_name}"') + self.log_debug(f'Adding module "{module_name}" of type "{module_type}"') is_valid, reason, preloaded = self._is_valid_module(module_name, module_type, raise_error=raise_error) if not is_valid: self.log_debug(f'Unable to add {module_type} module "{module_name}": {reason}') diff --git a/bbot/test/test_step_1/test_cli.py b/bbot/test/test_step_1/test_cli.py index d879d863d1..0a34b4faa5 100644 --- a/bbot/test/test_step_1/test_cli.py +++ b/bbot/test/test_step_1/test_cli.py @@ -198,6 +198,13 @@ async def test_cli_args(monkeypatch, caplog, clean_default_config): assert result == True assert "[ORG_STUB] evilcorp TARGET" in caplog.text + # activate modules by flag + caplog.clear() + assert not caplog.text + monkeypatch.setattr("sys.argv", ["bbot", "-f", "passive"]) + result = await cli._main() + assert result == True + # no args caplog.clear() assert not caplog.text From 84df8298e49f6f7e68723829ddece6a70c43e71e Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Wed, 17 Apr 2024 16:38:24 -0400 Subject: [PATCH 04/24] fix attribute error --- bbot/core/helpers/command.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bbot/core/helpers/command.py b/bbot/core/helpers/command.py index 14a788f8ac..06fc8a91f5 100644 --- a/bbot/core/helpers/command.py +++ b/bbot/core/helpers/command.py @@ -185,7 +185,8 @@ async def _write_proc_line(proc, chunk): proc.stdin.write(smart_encode(chunk) + b"\n") await proc.stdin.drain() except Exception as e: - command = " ".join([str(s) for s in proc.args]) + proc_args = [str(s) for s in getattr(proc, "args", [])] + command = " ".join(proc_args) log.warning(f"Error writing line to stdin for command: {command}: {e}") log.trace(traceback.format_exc()) From 355a5bee6ee60b4c664ab25a56e5be0dc1f80e38 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Tue, 23 Apr 2024 11:50:21 -0400 Subject: [PATCH 05/24] Better debugging during scan cancellation --- bbot/core/helpers/misc.py | 2 ++ bbot/scanner/scanner.py | 14 ++++++++++++-- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/bbot/core/helpers/misc.py b/bbot/core/helpers/misc.py index a4378069d8..206fc50f0f 100644 --- a/bbot/core/helpers/misc.py +++ b/bbot/core/helpers/misc.py @@ -1078,6 +1078,7 @@ def kill_children(parent_pid=None, sig=None): parent = psutil.Process(parent_pid) except psutil.NoSuchProcess: log.debug(f"No such PID: {parent_pid}") + return log.debug(f"Killing children of process ID {parent.pid}") children = parent.children(recursive=True) for child in children: @@ -1089,6 +1090,7 @@ def kill_children(parent_pid=None, sig=None): log.debug(f"No such PID: {child.pid}") except psutil.AccessDenied: log.debug(f"Error killing PID: {child.pid} - access denied") + log.debug(f"Finished killing children of process ID {parent.pid}") def str_or_file(s): diff --git a/bbot/scanner/scanner.py b/bbot/scanner/scanner.py index e01f85ce24..9f444d3cb5 100644 --- a/bbot/scanner/scanner.py +++ b/bbot/scanner/scanner.py @@ -351,7 +351,13 @@ async def async_start(self): self.critical(f"Unexpected error during scan:\n{traceback.format_exc()}") finally: - self._cancel_tasks() + tasks = self._cancel_tasks() + self.debug(f"Awaiting {len(tasks):,} tasks") + for task in tasks: + self.debug(f"Awaiting {task}") + with contextlib.suppress(BaseException): + await task + self.debug(f"Awaited {len(tasks):,} tasks") await self._report() await self._cleanup() @@ -663,13 +669,14 @@ def stop(self): if not self._stopping: self._stopping = True self.status = "ABORTING" - self.hugewarning(f"Aborting scan") + self.hugewarning("Aborting scan") self.trace() self._cancel_tasks() self._drain_queues() self.helpers.kill_children() self._drain_queues() self.helpers.kill_children() + self.debug("Finished aborting scan") async def finish(self): """Finalizes the scan by invoking the `finished()` method on all active modules if new activity is detected. @@ -729,6 +736,7 @@ def _cancel_tasks(self): Returns: None """ + self.debug("Cancelling all scan tasks") tasks = [] # module workers for m in self.modules.values(): @@ -746,6 +754,8 @@ def _cancel_tasks(self): self.helpers.cancel_tasks_sync(tasks) # process pool self.helpers.process_pool.shutdown(cancel_futures=True) + self.debug("Finished cancelling all scan tasks") + return tasks async def _report(self): """Asynchronously executes the `report()` method for each module in the scan. From 616fe2e70ea6f2cf52ea0be4cce2a5b6d50b5d32 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Tue, 23 Apr 2024 12:53:57 -0400 Subject: [PATCH 06/24] better engine error handling during scan cancellation --- bbot/core/engine.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/bbot/core/engine.py b/bbot/core/engine.py index c72eecbb32..24781ab3b3 100644 --- a/bbot/core/engine.py +++ b/bbot/core/engine.py @@ -153,19 +153,26 @@ async def run_and_return(self, client_id, command_fn, **kwargs): error = f"Unhandled error in {self.name}.{command_fn.__name__}({kwargs}): {e}" trace = traceback.format_exc() result = {"_e": (error, trace)} - await self.socket.send_multipart([client_id, pickle.dumps(result)]) + await self.send_socket_multipart([client_id, pickle.dumps(result)]) async def run_and_yield(self, client_id, command_fn, **kwargs): self.log.debug(f"{self.name} run-and-yield {command_fn.__name__}({kwargs})") try: async for _ in command_fn(**kwargs): - await self.socket.send_multipart([client_id, pickle.dumps(_)]) - await self.socket.send_multipart([client_id, pickle.dumps({"_s": None})]) + await self.send_socket_multipart([client_id, pickle.dumps(_)]) + await self.send_socket_multipart([client_id, pickle.dumps({"_s": None})]) except Exception as e: error = f"Unhandled error in {self.name}.{command_fn.__name__}({kwargs}): {e}" trace = traceback.format_exc() result = {"_e": (error, trace)} - await self.socket.send_multipart([client_id, pickle.dumps(result)]) + await self.send_socket_multipart([client_id, pickle.dumps(result)]) + + async def send_socket_multipart(self, *args, **kwargs): + try: + await self.socket.send_multipart(*args, **kwargs) + except Exception as e: + self.log.warning(f"Error sending ZMQ message: {e}") + self.log.trace(traceback.format_exc()) async def worker(self): try: From 820c15dd9432e186604ec3211bd2b1b27770d356 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Wed, 24 Apr 2024 13:42:30 -0400 Subject: [PATCH 07/24] wip dnsbrute rework --- bbot/core/helpers/dns/dns.py | 11 + bbot/core/helpers/helper.py | 4 +- bbot/modules/dnscommonsrv.py | 32 +- bbot/modules/massdns.py | 417 ------------------ bbot/modules/templates/subdomain_enum.py | 47 +- bbot/scanner/preset/preset.py | 6 +- bbot/scanner/target.py | 6 +- .../module_tests/test_module_dastardly.py | 2 +- .../module_tests/test_module_dnscommonsrv.py | 45 +- .../module_tests/test_module_massdns.py | 10 - 10 files changed, 87 insertions(+), 493 deletions(-) delete mode 100644 bbot/modules/massdns.py delete mode 100644 bbot/test/test_step_2/module_tests/test_module_massdns.py diff --git a/bbot/core/helpers/dns/dns.py b/bbot/core/helpers/dns/dns.py index 7f775483ca..a4d626e5cf 100644 --- a/bbot/core/helpers/dns/dns.py +++ b/bbot/core/helpers/dns/dns.py @@ -73,6 +73,9 @@ def __init__(self, parent_helper): # TODO: DNS server speed test (start in background task) self.resolver_file = self.parent_helper.tempfile(self.system_resolvers, pipe=False) + # brute force helper + self._brute = None + async def resolve(self, query, **kwargs): return await self.run_and_return("resolve", query=query, **kwargs) @@ -84,6 +87,14 @@ async def resolve_raw_batch(self, queries): async for _ in self.run_and_yield("resolve_raw_batch", queries=queries): yield _ + @property + def brute(self): + if self._brute is None: + from .brute import DNSBrute + + self._brute = DNSBrute(self.parent_helper) + return self._brute + async def is_wildcard(self, query, ips=None, rdtype=None): """ Use this method to check whether a *host* is a wildcard entry diff --git a/bbot/core/helpers/helper.py b/bbot/core/helpers/helper.py index 16afc05cd2..e4dc09326b 100644 --- a/bbot/core/helpers/helper.py +++ b/bbot/core/helpers/helper.py @@ -110,8 +110,8 @@ def clean_old_scans(self): _filter = lambda x: x.is_dir() and self.regexes.scan_name_regex.match(x.name) self.clean_old(self.scans_dir, keep=self.keep_old_scans, filter=_filter) - def make_target(self, *events): - return Target(*events) + def make_target(self, *events, **kwargs): + return Target(*events, **kwargs) @property def config(self): diff --git a/bbot/modules/dnscommonsrv.py b/bbot/modules/dnscommonsrv.py index eef8e2d8ca..ae4f39c88d 100644 --- a/bbot/modules/dnscommonsrv.py +++ b/bbot/modules/dnscommonsrv.py @@ -1,4 +1,4 @@ -from bbot.modules.base import BaseModule +from bbot.modules.templates.subdomain_enum import subdomain_enum # the following are the result of a 1-day internet survey to find the top SRV records # the scan resulted in 36,282 SRV records. the count for each one is shown. @@ -149,33 +149,15 @@ ] -class dnscommonsrv(BaseModule): +class dnscommonsrv(subdomain_enum): watched_events = ["DNS_NAME"] produced_events = ["DNS_NAME"] flags = ["subdomain-enum", "passive", "safe"] meta = {"description": "Check for common SRV records"} - options = {"top": 50, "max_event_handlers": 10} - options_desc = { - "top": "How many of the top SRV records to check", - "max_event_handlers": "How many instances of the module to run concurrently", - } - _max_event_handlers = 10 - - def _incoming_dedup_hash(self, event): - # dedupe by parent - parent_domain = self.helpers.parent_domain(event.data) - return hash(parent_domain), "already processed parent domain" - - async def filter_event(self, event): - # skip SRV wildcards - if "SRV" in await self.helpers.is_wildcard(event.host): - return False - return True async def handle_event(self, event): - top = int(self.config.get("top", 50)) - parent_domain = self.helpers.parent_domain(event.data) - queries = [f"{srv}.{parent_domain}" for srv in common_srvs[:top]] - async for query, results in self.helpers.resolve_batch(queries, type="srv"): - if results: - await self.emit_event(query, "DNS_NAME", tags=["srv-record"], source=event) + self.hugesuccess(event) + query = self.make_query(event) + self.verbose(f'Brute-forcing SRV records for "{query}"') + for hostname in await self.helpers.dns.brute(self, query, common_srvs, type="SRV"): + await self.emit_event(hostname, "DNS_NAME", source=event) diff --git a/bbot/modules/massdns.py b/bbot/modules/massdns.py deleted file mode 100644 index ffacb8c644..0000000000 --- a/bbot/modules/massdns.py +++ /dev/null @@ -1,417 +0,0 @@ -import json -import random -import subprocess -import regex as re - -from bbot.modules.templates.subdomain_enum import subdomain_enum - - -class massdns(subdomain_enum): - """ - This is BBOT's flagship subdomain enumeration module. - - It uses massdns to brute-force subdomains. - At the end of a scan, it will leverage BBOT's word cloud to recursively discover target-specific subdomain mutations. - - Each subdomain discovered via mutations is tagged with the "mutation" tag. This tag indicates the depth at which - the mutation was found. I.e. the first mutation will be tagged "mutation-1". The second one (a mutation of a - mutation) will be "mutation-2". Mutations of mutations of mutations will be "mutation-3", etc. - - This is especially use for bug bounties because it enables you to recognize distant/rare subdomains at a glance. - Subdomains with higher mutation levels are more likely to be distant/rare or never-before-seen. - """ - - flags = ["subdomain-enum", "passive", "aggressive"] - watched_events = ["DNS_NAME"] - produced_events = ["DNS_NAME"] - meta = {"description": "Brute-force subdomains with massdns (highly effective)"} - options = { - "wordlist": "https://raw.githubusercontent.com/danielmiessler/SecLists/master/Discovery/DNS/subdomains-top1million-5000.txt", - "max_resolvers": 1000, - "max_mutations": 500, - "max_depth": 5, - } - options_desc = { - "wordlist": "Subdomain wordlist URL", - "max_resolvers": "Number of concurrent massdns resolvers", - "max_mutations": "Max number of smart mutations per subdomain", - "max_depth": "How many subdomains deep to brute force, i.e. 5.4.3.2.1.evilcorp.com", - } - subdomain_file = None - deps_common = ["massdns"] - reject_wildcards = "strict" - _qsize = 10000 - - digit_regex = re.compile(r"\d+") - - async def setup(self): - self.found = dict() - self.mutations_tried = set() - self.source_events = self.helpers.make_target() - self.subdomain_file = await self.helpers.wordlist(self.config.get("wordlist")) - self.subdomain_list = set(self.helpers.read_file(self.subdomain_file)) - - ms_on_prem_string_file = self.helpers.wordlist_dir / "ms_on_prem_subdomains.txt" - ms_on_prem_strings = set(self.helpers.read_file(ms_on_prem_string_file)) - self.subdomain_list.update(ms_on_prem_strings) - - self.max_resolvers = self.config.get("max_resolvers", 1000) - self.max_mutations = self.config.get("max_mutations", 500) - self.max_depth = max(1, self.config.get("max_depth", 5)) - nameservers_url = ( - "https://raw.githubusercontent.com/blacklanternsecurity/public-dns-servers/master/nameservers.txt" - ) - self.resolver_file = await self.helpers.wordlist( - nameservers_url, - cache_hrs=24 * 7, - ) - self.devops_mutations = list(self.helpers.word_cloud.devops_mutations) - self._mutation_run = 1 - - return await super().setup() - - async def filter_event(self, event): - query = self.make_query(event) - eligible, reason = await self.eligible_for_enumeration(event) - - # limit brute force depth - subdomain_depth = self.helpers.subdomain_depth(query) + 1 - if subdomain_depth > self.max_depth: - eligible = False - reason = f"subdomain depth of *.{query} ({subdomain_depth}) > max_depth ({self.max_depth})" - - # don't brute-force things that look like autogenerated PTRs - if self.helpers.is_ptr(query): - eligible = False - reason = f'"{query}" looks like an autogenerated PTR' - - if eligible: - self.add_found(event) - # reject if already processed - if self.already_processed(query): - return False, f'Query "{query}" was already processed' - - if eligible: - self.processed.add(hash(query)) - return True, reason - return False, reason - - async def handle_event(self, event): - query = self.make_query(event) - self.source_events.add_target(event) - self.info(f"Brute-forcing subdomains for {query} (source: {event.data})") - for hostname in await self.massdns(query, self.subdomain_list): - await self.emit_result(hostname, event, query) - - def abort_if(self, event): - if not event.scope_distance == 0: - return True, "event is not in scope" - if "wildcard" in event.tags: - return True, "event is a wildcard" - if "unresolved" in event.tags: - return True, "event is unresolved" - return False, "" - - async def emit_result(self, result, source_event, query, tags=None): - if not result == source_event: - kwargs = {"abort_if": self.abort_if} - if tags is not None: - kwargs["tags"] = tags - await self.emit_event(result, "DNS_NAME", source_event, **kwargs) - - def already_processed(self, hostname): - if hash(hostname) in self.processed: - return True - return False - - async def massdns(self, domain, subdomains): - subdomains = list(subdomains) - - domain_wildcard_rdtypes = set() - for _domain, rdtypes in (await self.helpers.is_wildcard_domain(domain)).items(): - for rdtype, results in rdtypes.items(): - if results: - domain_wildcard_rdtypes.add(rdtype) - if any([r in domain_wildcard_rdtypes for r in ("A", "CNAME")]): - self.info( - f"Aborting massdns on {domain} because it's a wildcard domain ({','.join(domain_wildcard_rdtypes)})" - ) - self.found.pop(domain, None) - return [] - else: - self.log.trace(f"{domain}: A is not in domain_wildcard_rdtypes:{domain_wildcard_rdtypes}") - - # before we start, do a canary check for wildcards - abort_msg = f"Aborting massdns on {domain} due to false positive" - canary_result = await self._canary_check(domain) - if canary_result: - self.info(abort_msg + f": {canary_result}") - return [] - else: - self.log.trace(f"Canary result for {domain}: {canary_result}") - - results = [] - async for hostname, ip, rdtype in self._massdns(domain, subdomains): - # allow brute-forcing of wildcard domains - # this is dead code but it's kinda cool so it can live here - if rdtype in domain_wildcard_rdtypes: - # skip wildcard checking on multi-level subdomains for performance reasons - stem = hostname.split(domain)[0].strip(".") - if "." in stem: - self.debug(f"Skipping {hostname}:A because it may be a wildcard (reason: performance)") - continue - wildcard_rdtypes = await self.helpers.is_wildcard(hostname, ips=(ip,), rdtype=rdtype) - if rdtype in wildcard_rdtypes: - self.debug(f"Skipping {hostname}:{rdtype} because it's a wildcard") - continue - results.append(hostname) - - # do another canary check for good measure - if len(results) > 50: - canary_result = await self._canary_check(domain) - if canary_result: - self.info(abort_msg + f": {canary_result}") - return [] - else: - self.log.trace(f"Canary result for {domain}: {canary_result}") - - # abort if there are a suspiciously high number of results - # (the results are over 2000, and this is more than 20 percent of the input size) - if len(results) > 2000: - if len(results) / len(subdomains) > 0.2: - self.info( - f"Aborting because the number of results ({len(results):,}) is suspiciously high for the length of the wordlist ({len(subdomains):,})" - ) - return [] - else: - self.info( - f"{len(results):,} results returned from massdns against {domain} (wordlist size = {len(subdomains):,})" - ) - - # everything checks out - return results - - async def _canary_check(self, domain, num_checks=50): - random_subdomains = list(self.gen_random_subdomains(num_checks)) - self.verbose(f"Testing {len(random_subdomains):,} canaries against {domain}") - canary_results = [h async for h, d, r in self._massdns(domain, random_subdomains)] - self.log.trace(f"canary results for {domain}: {canary_results}") - resolved_canaries = self.helpers.resolve_batch(canary_results) - self.log.trace(f"resolved canary results for {domain}: {canary_results}") - async for query, result in resolved_canaries: - if result: - await resolved_canaries.aclose() - result = f"{query}:{result}" - self.log.trace(f"Found false positive: {result}") - return result - self.log.trace(f"Passed canary check for {domain}") - return False - - async def _massdns(self, domain, subdomains): - """ - { - "name": "www.blacklanternsecurity.com.", - "type": "A", - "class": "IN", - "status": "NOERROR", - "data": { - "answers": [ - { - "ttl": 3600, - "type": "CNAME", - "class": "IN", - "name": "www.blacklanternsecurity.com.", - "data": "blacklanternsecurity.github.io." - }, - { - "ttl": 3600, - "type": "A", - "class": "IN", - "name": "blacklanternsecurity.github.io.", - "data": "185.199.108.153" - } - ] - }, - "resolver": "168.215.165.186:53" - } - """ - if self.scan.stopping: - return - - command = ( - "massdns", - "-r", - self.resolver_file, - "-s", - self.max_resolvers, - "-t", - "A", - "-o", - "J", - "-q", - ) - subdomains = self.gen_subdomains(subdomains, domain) - hosts_yielded = set() - async for line in self.run_process_live(command, stderr=subprocess.DEVNULL, input=subdomains): - try: - j = json.loads(line) - except json.decoder.JSONDecodeError: - self.debug(f"Failed to decode line: {line}") - continue - answers = j.get("data", {}).get("answers", []) - if type(answers) == list and len(answers) > 0: - answer = answers[0] - hostname = answer.get("name", "").strip(".").lower() - if hostname.endswith(f".{domain}"): - data = answer.get("data", "") - rdtype = answer.get("type", "").upper() - # avoid garbage answers like this: - # 8AAAA queries have been locally blocked by dnscrypt-proxy/Set block_ipv6 to false to disable this feature - if data and rdtype and not " " in data: - hostname_hash = hash(hostname) - if hostname_hash not in hosts_yielded: - hosts_yielded.add(hostname_hash) - yield hostname, data, rdtype - - async def finish(self): - found = sorted(self.found.items(), key=lambda x: len(x[-1]), reverse=True) - # if we have a lot of rounds to make, don't try mutations on less-populated domains - trimmed_found = [] - if found: - avg_subdomains = sum([len(subdomains) for domain, subdomains in found[:50]]) / len(found[:50]) - for i, (domain, subdomains) in enumerate(found): - # accept domains that are in the top 50 or have more than 5 percent of the average number of subdomains - if i < 50 or (len(subdomains) > 1 and len(subdomains) >= (avg_subdomains * 0.05)): - trimmed_found.append((domain, subdomains)) - else: - self.verbose( - f"Skipping mutations on {domain} because it only has {len(subdomains):,} subdomain(s) (avg: {avg_subdomains:,})" - ) - - base_mutations = set() - found_mutations = False - try: - for i, (domain, subdomains) in enumerate(trimmed_found): - self.verbose(f"{domain} has {len(subdomains):,} subdomains") - # keep looping as long as we're finding things - while 1: - max_mem_percent = 90 - mem_status = self.helpers.memory_status() - # abort if we don't have the memory - mem_percent = mem_status.percent - if mem_percent > max_mem_percent: - free_memory = mem_status.available - free_memory_human = self.helpers.bytes_to_human(free_memory) - assert ( - False - ), f"Cannot proceed with DNS mutations because system memory is at {mem_percent:.1f}% ({free_memory_human} remaining)" - - query = domain - domain_hash = hash(domain) - if self.scan.stopping: - return - - mutations = set(base_mutations) - - def add_mutation(_domain_hash, m): - h = hash((_domain_hash, m)) - if h not in self.mutations_tried: - self.mutations_tried.add(h) - mutations.add(m) - - # try every subdomain everywhere else - for _domain, _subdomains in found: - if _domain == domain: - continue - for s in _subdomains: - first_segment = s.split(".")[0] - # skip stuff with lots of numbers (e.g. PTRs) - digits = self.digit_regex.findall(first_segment) - excessive_digits = len(digits) > 2 - long_digits = any(len(d) > 3 for d in digits) - if excessive_digits or long_digits: - continue - add_mutation(domain_hash, first_segment) - for word in self.helpers.extract_words( - first_segment, word_regexes=self.helpers.word_cloud.dns_mutator.extract_word_regexes - ): - add_mutation(domain_hash, word) - - # numbers + devops mutations - for mutation in self.helpers.word_cloud.mutations( - subdomains, cloud=False, numbers=3, number_padding=1 - ): - for delimiter in ("", ".", "-"): - m = delimiter.join(mutation).lower() - add_mutation(domain_hash, m) - - # special dns mutator - for subdomain in self.helpers.word_cloud.dns_mutator.mutations( - subdomains, max_mutations=self.max_mutations - ): - add_mutation(domain_hash, subdomain) - - if mutations: - self.info(f"Trying {len(mutations):,} mutations against {domain} ({i+1}/{len(found)})") - results = list(await self.massdns(query, mutations)) - for hostname in results: - source_event = self.source_events.get(hostname) - if source_event is None: - self.warning(f"Could not correlate source event from: {hostname}") - source_event = self.scan.root_event - await self.emit_result( - hostname, source_event, query, tags=[f"mutation-{self._mutation_run}"] - ) - if results: - found_mutations = True - continue - break - except AssertionError as e: - self.warning(e) - - if found_mutations: - self._mutation_run += 1 - - def add_found(self, host): - if not isinstance(host, str): - host = host.data - if self.helpers.is_subdomain(host): - subdomain, domain = host.split(".", 1) - is_ptr = self.helpers.is_ptr(subdomain) - in_scope = self.scan.in_scope(domain) - if in_scope and not is_ptr: - try: - self.found[domain].add(subdomain) - except KeyError: - self.found[domain] = set((subdomain,)) - - async def gen_subdomains(self, prefixes, domain): - for p in prefixes: - d = f"{p}.{domain}" - yield d - - def gen_random_subdomains(self, n=50): - delimiters = (".", "-") - lengths = list(range(3, 8)) - for i in range(0, max(0, n - 5)): - d = delimiters[i % len(delimiters)] - l = lengths[i % len(lengths)] - segments = list(random.choice(self.devops_mutations) for _ in range(l)) - segments.append(self.helpers.rand_string(length=8, digits=False)) - subdomain = d.join(segments) - yield subdomain - for _ in range(5): - yield self.helpers.rand_string(length=8, digits=False) - - def has_excessive_digits(self, d): - """ - Identifies dns names with excessive numbers, e.g.: - - w1-2-3.evilcorp.com - - ptr1234.evilcorp.com - """ - digits = self.digit_regex.findall(d) - excessive_digits = len(digits) > 2 - long_digits = any(len(d) > 3 for d in digits) - if excessive_digits or long_digits: - return True - return False diff --git a/bbot/modules/templates/subdomain_enum.py b/bbot/modules/templates/subdomain_enum.py index 790b35515b..3c65dfa34b 100644 --- a/bbot/modules/templates/subdomain_enum.py +++ b/bbot/modules/templates/subdomain_enum.py @@ -16,16 +16,39 @@ class subdomain_enum(BaseModule): # set module error state after this many failed requests in a row abort_after_failures = 5 + # whether to reject wildcard DNS_NAMEs reject_wildcards = "strict" - # this helps combat rate limiting by ensuring that a query doesn't execute + + # set qsize to 1. this helps combat rate limiting by ensuring that a query doesn't execute # until the queue is ready to receive its results _qsize = 1 + # how to deduplicate incoming events + # options: + # "root_domain": if a dns name has already been tried, don't try any of its children + # "parent_domain": always try a domain unless its direct parent has already been tried + dedup_strategy = "root_domain" + async def setup(self): - self.processed = set() + strict_scope = self.dedup_strategy == "parent_domain" + self.processed = self.helpers.make_target(strict_scope=strict_scope) return True + async def filter_event(self, event): + """ + This filter_event is used across many modules + """ + query = self.make_query(event) + # reject if already processed + if query in self.processed: + return False, "Event was already processed" + eligible, reason = await self.eligible_for_enumeration(event) + if eligible: + self.processed.add(query) + return True, reason + return False, reason + async def handle_event(self, event): query = self.make_query(event) results = await self.query(query) @@ -91,20 +114,6 @@ async def _is_wildcard(self, query): return True return False - async def filter_event(self, event): - """ - This filter_event is used across many modules - """ - query = self.make_query(event) - # reject if already processed - if self.already_processed(query): - return False, "Event was already processed" - eligible, reason = await self.eligible_for_enumeration(event) - if eligible: - self.processed.add(hash(query)) - return True, reason - return False, reason - async def eligible_for_enumeration(self, event): query = self.make_query(event) # check if wildcard @@ -128,12 +137,6 @@ async def eligible_for_enumeration(self, event): return False, "Event is both a cloud resource and a wildcard domain" return True, "" - def already_processed(self, hostname): - for parent in self.helpers.domain_parents(hostname, include_self=True): - if hash(parent) in self.processed: - return True - return False - async def abort_if(self, event): # this helps weed out unwanted results when scanning IP_RANGES and wildcard domains if "in-scope" not in event.tags: diff --git a/bbot/scanner/preset/preset.py b/bbot/scanner/preset/preset.py index 57eb11a1a1..9055d03ba8 100644 --- a/bbot/scanner/preset/preset.py +++ b/bbot/scanner/preset/preset.py @@ -303,9 +303,9 @@ def merge(self, other): self.explicit_output_modules.update(other.explicit_output_modules) self.flags.update(other.flags) # scope - self.target.add_target(other.target) - self.whitelist.add_target(other.whitelist) - self.blacklist.add_target(other.blacklist) + self.target.add(other.target) + self.whitelist.add(other.whitelist) + self.blacklist.add(other.blacklist) self.strict_scope = self.strict_scope or other.strict_scope for t in (self.target, self.whitelist): t.strict_scope = self.strict_scope diff --git a/bbot/scanner/target.py b/bbot/scanner/target.py index b19d1b6a61..878e80846f 100644 --- a/bbot/scanner/target.py +++ b/bbot/scanner/target.py @@ -93,11 +93,11 @@ def __init__(self, *targets, strict_scope=False): if len(targets) > 0: log.verbose(f"Creating events from {len(targets):,} targets") for t in targets: - self.add_target(t) + self.add(t) self._hash = None - def add_target(self, t, event_type=None): + def add(self, t, event_type=None): """ Add a target or merge events from another Target object into this Target. @@ -108,7 +108,7 @@ def add_target(self, t, event_type=None): _events (dict): The dictionary is updated to include the new target's events. Examples: - >>> target.add_target('example.com') + >>> target.add('example.com') Notes: - If `t` is of the same class as this Target, all its events are merged. diff --git a/bbot/test/test_step_2/module_tests/test_module_dastardly.py b/bbot/test/test_step_2/module_tests/test_module_dastardly.py index ed4c20e5cb..fe9de5d6c4 100644 --- a/bbot/test/test_step_2/module_tests/test_module_dastardly.py +++ b/bbot/test/test_step_2/module_tests/test_module_dastardly.py @@ -44,7 +44,7 @@ async def setup_after_prep(self, module_test): # get docker IP docker_ip = await self.get_docker_ip(module_test) - module_test.scan.target.add_target(docker_ip) + module_test.scan.target.add(docker_ip) # replace 127.0.0.1 with docker host IP to allow dastardly access to local http server old_filter_event = module_test.module.filter_event diff --git a/bbot/test/test_step_2/module_tests/test_module_dnscommonsrv.py b/bbot/test/test_step_2/module_tests/test_module_dnscommonsrv.py index 5850fbd495..3d3d670e1e 100644 --- a/bbot/test/test_step_2/module_tests/test_module_dnscommonsrv.py +++ b/bbot/test/test_step_2/module_tests/test_module_dnscommonsrv.py @@ -2,22 +2,47 @@ class TestDNSCommonSRV(ModuleTestBase): - targets = ["blacklanternsecurity.notreal"] config_overrides = {"dns_resolution": True} async def setup_after_prep(self, module_test): + + old_run_live = module_test.scan.helpers.run_live + + async def new_run_live(*command, check=False, text=True, **kwargs): + if "massdns" in command[:2]: + yield """{"name":"_ldap._tcp.gc._msdcs.blacklanternsecurity.com.","type":"SRV","class":"IN","status":"NOERROR","rx_ts":1713974911725326170,"data":{"answers":[{"ttl":86400,"type":"SRV","class":"IN","name":"_ldap._tcp.gc._msdcs.blacklanternsecurity.com.","data":"10 10 1720 asdf.blacklanternsecurity.com."},{"ttl":86400,"type":"SRV","class":"IN","name":"_ldap._tcp.gc._msdcs.blacklanternsecurity.com.","data":"10 10 1720 asdf.blacklanternsecurity.com."}]},"flags":["rd","ra"],"resolver":"195.226.187.130:53","proto":"UDP"}""" + else: + async for _ in old_run_live(*command, check=False, text=True, **kwargs): + yield _ + + module_test.monkeypatch.setattr(module_test.scan.helpers, "run_live", new_run_live) + await module_test.mock_dns( { - "_ldap._tcp.gc._msdcs.blacklanternsecurity.notreal": { - "SRV": ["0 100 3268 asdf.blacklanternsecurity.notreal"] - }, - "asdf.blacklanternsecurity.notreal": {"A": "1.2.3.4"}, + "blacklanternsecurity.com": {"A": ["1.2.3.4"]}, + "_ldap._tcp.gc._msdcs.blacklanternsecurity.com": {"SRV": ["0 100 3268 asdf.blacklanternsecurity.com"]}, + "asdf.blacklanternsecurity.com": {"A": ["1.2.3.5"]}, } ) def check(self, module_test, events): - assert any( - e.data == "_ldap._tcp.gc._msdcs.blacklanternsecurity.notreal" for e in events - ), "Failed to detect subdomain" - assert any(e.data == "asdf.blacklanternsecurity.notreal" for e in events), "Failed to detect subdomain" - assert not any(e.data == "_ldap._tcp.dc._msdcs.blacklanternsecurity.notreal" for e in events), "False positive" + assert len(events) == 4 + assert 1 == len([e for e in events if e.type == "DNS_NAME" and e.data == "blacklanternsecurity.com"]) + assert 1 == len( + [ + e + for e in events + if e.type == "DNS_NAME" + and e.data == "_ldap._tcp.gc._msdcs.blacklanternsecurity.com" + and str(e.module) == "dnscommonsrv" + ] + ), "Failed to detect subdomain 1" + assert 1 == len( + [ + e + for e in events + if e.type == "DNS_NAME" + and e.data == "asdf.blacklanternsecurity.com" + and str(e.module) != "dnscommonsrv" + ] + ), "Failed to detect subdomain 2" diff --git a/bbot/test/test_step_2/module_tests/test_module_massdns.py b/bbot/test/test_step_2/module_tests/test_module_massdns.py deleted file mode 100644 index 1b45437888..0000000000 --- a/bbot/test/test_step_2/module_tests/test_module_massdns.py +++ /dev/null @@ -1,10 +0,0 @@ -from .base import ModuleTestBase, tempwordlist - - -class TestMassdns(ModuleTestBase): - subdomain_wordlist = tempwordlist(["www", "asdf"]) - config_overrides = {"modules": {"massdns": {"wordlist": str(subdomain_wordlist)}}} - - def check(self, module_test, events): - assert any(e.data == "www.blacklanternsecurity.com" for e in events) - assert not any(e.data == "asdf.blacklanternsecurity.com" for e in events) From f5ad756f269eae60c8f0b413d9abafbbda3561a2 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Wed, 24 Apr 2024 13:42:49 -0400 Subject: [PATCH 08/24] wip dnsbrute rework --- bbot/core/helpers/dns/brute.py | 180 ++++++++++++++++++ bbot/modules/dnsbrute.py | 50 +++++ .../module_tests/test_module_dnsbrute.py | 78 ++++++++ 3 files changed, 308 insertions(+) create mode 100644 bbot/core/helpers/dns/brute.py create mode 100644 bbot/modules/dnsbrute.py create mode 100644 bbot/test/test_step_2/module_tests/test_module_dnsbrute.py diff --git a/bbot/core/helpers/dns/brute.py b/bbot/core/helpers/dns/brute.py new file mode 100644 index 0000000000..5bfa7b2e08 --- /dev/null +++ b/bbot/core/helpers/dns/brute.py @@ -0,0 +1,180 @@ +import json +import random +import asyncio +import logging +import subprocess + + +class DNSBrute: + """ + Helper for DNS brute-forcing. + + Examples: + >>> domain = "evilcorp.com" + >>> subdomains = ["www", "mail"] + >>> results = await self.helpers.dns.brute(self, domain, subdomains) + """ + + nameservers_url = ( + "https://raw.githubusercontent.com/blacklanternsecurity/public-dns-servers/master/nameservers.txt" + ) + + def __init__(self, parent_helper): + self.parent_helper = parent_helper + self.log = logging.getLogger("bbot.helper.dns.brute") + self.num_canaries = 100 + self.max_resolvers = 1000 + self.devops_mutations = list(self.parent_helper.word_cloud.devops_mutations) + self.digit_regex = self.parent_helper.re.compile(r"\d+") + self._resolver_file = None + self._dnsbrute_lock = asyncio.Lock() + + async def __call__(self, *args, **kwargs): + return await self.dnsbrute(*args, **kwargs) + + async def dnsbrute(self, module, domain, subdomains, type=None): + subdomains = list(subdomains) + + if type is None: + type = "A" + type = str(type).strip().upper() + + domain_wildcard_rdtypes = set() + for _domain, rdtypes in (await self.parent_helper.dns.is_wildcard_domain(domain)).items(): + for rdtype, results in rdtypes.items(): + if results: + domain_wildcard_rdtypes.add(rdtype) + if any([r in domain_wildcard_rdtypes for r in (type, "CNAME")]): + self.log.info( + f"Aborting massdns on {domain} because it's a wildcard domain ({','.join(domain_wildcard_rdtypes)})" + ) + return [] + else: + self.log.trace(f"{domain}: A is not in domain_wildcard_rdtypes:{domain_wildcard_rdtypes}") + + canaries = self.gen_random_subdomains(self.num_canaries) + canaries_list = list(canaries) + canaries_pre = canaries_list[: int(self.num_canaries / 2)] + canaries_post = canaries_list[int(self.num_canaries / 2) :] + # sandwich subdomains between canaries + subdomains = canaries_pre + subdomains + canaries_post + + results = [] + canaries_triggered = [] + async for hostname, ip, rdtype in self._massdns(module, domain, subdomains, rdtype=type): + sub = hostname.split(domain)[0] + if sub in canaries: + canaries_triggered.append(sub) + else: + results.append(hostname) + + if canaries_triggered > 5: + self.log.info( + f"Aborting massdns on {domain} due to false positive: ({len(canaries_triggered):,} canaries triggered - {','.join(canaries_triggered)})" + ) + return [] + + # everything checks out + return results + + async def _massdns(self, module, domain, subdomains, rdtype): + """ + { + "name": "www.blacklanternsecurity.com.", + "type": "A", + "class": "IN", + "status": "NOERROR", + "data": { + "answers": [ + { + "ttl": 3600, + "type": "CNAME", + "class": "IN", + "name": "www.blacklanternsecurity.com.", + "data": "blacklanternsecurity.github.io." + }, + { + "ttl": 3600, + "type": "A", + "class": "IN", + "name": "blacklanternsecurity.github.io.", + "data": "185.199.108.153" + } + ] + }, + "resolver": "168.215.165.186:53" + } + """ + resolver_file = await self.resolver_file() + command = ( + "massdns", + "-r", + resolver_file, + "-s", + self.max_resolvers, + "-t", + rdtype, + "-o", + "J", + "-q", + ) + subdomains = self.gen_subdomains(subdomains, domain) + hosts_yielded = set() + async with self._dnsbrute_lock: + async for line in module.run_process_live(*command, stderr=subprocess.DEVNULL, input=subdomains): + self.log.critical(line) + try: + j = json.loads(line) + except json.decoder.JSONDecodeError: + self.log.debug(f"Failed to decode line: {line}") + continue + answers = j.get("data", {}).get("answers", []) + if type(answers) == list and len(answers) > 0: + answer = answers[0] + hostname = answer.get("name", "").strip(".").lower() + if hostname.endswith(f".{domain}"): + data = answer.get("data", "") + rdtype = answer.get("type", "").upper() + if data and rdtype: + hostname_hash = hash(hostname) + if hostname_hash not in hosts_yielded: + hosts_yielded.add(hostname_hash) + yield hostname, data, rdtype + + async def gen_subdomains(self, prefixes, domain): + for p in prefixes: + d = f"{p}.{domain}" + yield d + + async def resolver_file(self): + if self._resolver_file is None: + self._resolver_file = await self.parent_helper.wordlist( + self.nameservers_url, + cache_hrs=24 * 7, + ) + return self._resolver_file + + def gen_random_subdomains(self, n=50): + delimiters = (".", "-") + lengths = list(range(3, 8)) + for i in range(0, max(0, n - 5)): + d = delimiters[i % len(delimiters)] + l = lengths[i % len(lengths)] + segments = list(random.choice(self.devops_mutations) for _ in range(l)) + segments.append(self.parent_helper.rand_string(length=8, digits=False)) + subdomain = d.join(segments) + yield subdomain + for _ in range(5): + yield self.parent_helper.rand_string(length=8, digits=False) + + def has_excessive_digits(self, d): + """ + Identifies dns names with excessive numbers, e.g.: + - w1-2-3.evilcorp.com + - ptr1234.evilcorp.com + """ + is_ptr = self.parent_helper.is_ptr(d) + digits = self.digit_regex.findall(d) + excessive_digits = len(digits) > 2 + long_digits = any(len(d) > 3 for d in digits) + return is_ptr or excessive_digits or long_digits diff --git a/bbot/modules/dnsbrute.py b/bbot/modules/dnsbrute.py new file mode 100644 index 0000000000..2df3a48d60 --- /dev/null +++ b/bbot/modules/dnsbrute.py @@ -0,0 +1,50 @@ +from bbot.modules.templates.subdomain_enum import subdomain_enum + + +class dnsbrute(subdomain_enum): + flags = ["subdomain-enum", "passive", "aggressive"] + watched_events = ["DNS_NAME"] + produced_events = ["DNS_NAME"] + meta = {"description": "Brute-force subdomains with massdns + static wordlist"} + options = { + "wordlist": "https://raw.githubusercontent.com/danielmiessler/SecLists/master/Discovery/DNS/subdomains-top1million-5000.txt", + "max_depth": 5, + } + options_desc = { + "wordlist": "Subdomain wordlist URL", + "max_depth": "How many subdomains deep to brute force, i.e. 5.4.3.2.1.evilcorp.com", + } + deps_common = ["massdns"] + reject_wildcards = "strict" + dedup_strategy = "parent_domain" + _qsize = 10000 + + async def setup(self): + self.max_depth = max(1, self.config.get("max_depth", 5)) + self.subdomain_file = await self.helpers.wordlist(self.config.get("wordlist")) + self.subdomain_list = set(self.helpers.read_file(self.subdomain_file)) + return await super().setup() + + async def eligible_for_enumeration(self, event): + eligible, reason = await super().eligible_for_enumeration(event) + query = self.make_query(event) + + # limit brute force depth + subdomain_depth = self.helpers.subdomain_depth(query) + 1 + if subdomain_depth > self.max_depth: + eligible = False + reason = f"subdomain depth of *.{query} ({subdomain_depth}) > max_depth ({self.max_depth})" + + # don't brute-force things that look like autogenerated PTRs + if self.helpers.dns.brute.has_excessive_digits(query): + eligible = False + reason = f'"{query}" looks like an autogenerated PTR' + + return eligible, reason + + async def handle_event(self, event): + self.hugewarning(event) + query = self.make_query(event) + self.info(f"Brute-forcing subdomains for {query} (source: {event.data})") + for hostname in await self.helpers.dns.brute(self, query, self.subdomain_list): + await self.emit_event(hostname, "DNS_NAME", source=event) diff --git a/bbot/test/test_step_2/module_tests/test_module_dnsbrute.py b/bbot/test/test_step_2/module_tests/test_module_dnsbrute.py new file mode 100644 index 0000000000..664539bb49 --- /dev/null +++ b/bbot/test/test_step_2/module_tests/test_module_dnsbrute.py @@ -0,0 +1,78 @@ +from .base import ModuleTestBase, tempwordlist + + +class TestDnsbrute(ModuleTestBase): + subdomain_wordlist = tempwordlist(["www", "asdf"]) + config_overrides = {"modules": {"massdns": {"wordlist": str(subdomain_wordlist), "max_depth": 3}}} + + async def setup_after_prep(self, module_test): + + old_run_live = module_test.scan.helpers.run_live + + async def new_run_live(*command, check=False, text=True, **kwargs): + if "massdns" in command[:2]: + yield """{"name": "www-test.blacklanternsecurity.com.", "type": "A", "class": "IN", "status": "NOERROR", "rx_ts": 1713974911725326170, "data": {"answers": [{"ttl": 86400, "type": "A", "class": "IN", "name": "www-test.blacklanternsecurity.com.", "data": "1.2.3.4."}]}, "flags": ["rd", "ra"], "resolver": "195.226.187.130:53", "proto": "UDP"}""" + else: + async for _ in old_run_live(*command, check=False, text=True, **kwargs): + yield _ + + module_test.monkeypatch.setattr(module_test.scan.helpers, "run_live", new_run_live) + + await module_test.mock_dns( + { + "www-test.blacklanternsecurity.com": {"A": ["1.2.3.4"]}, + } + ) + + # test recursive brute-force event filtering + event = module_test.scan.make_event("evilcorp.com", "DNS_NAME", source=module_test.scan.root_event) + event.scope_distance = 0 + result, reason = await module_test.module.filter_event(event) + assert result == True + assert "evilcorp.com" in module_test.module.processed + assert not "com" in module_test.module.processed + event = module_test.scan.make_event("evilcorp.com", "DNS_NAME", source=module_test.scan.root_event) + event.scope_distance = 0 + result, reason = await module_test.module.filter_event(event) + assert result == False + assert reason == "Event was already processed" + event = module_test.scan.make_event("www.evilcorp.com", "DNS_NAME", source=module_test.scan.root_event) + event.scope_distance = 0 + result, reason = await module_test.module.filter_event(event) + assert result == False + assert reason == "Event was already processed" + event = module_test.scan.make_event("test.www.evilcorp.com", "DNS_NAME", source=module_test.scan.root_event) + event.scope_distance = 0 + result, reason = await module_test.module.filter_event(event) + assert result == True + event = module_test.scan.make_event("test.www.evilcorp.com", "DNS_NAME", source=module_test.scan.root_event) + event.scope_distance = 0 + result, reason = await module_test.module.filter_event(event) + assert result == False + assert reason == "Event was already processed" + event = module_test.scan.make_event( + "asdf.test.www.evilcorp.com", "DNS_NAME", source=module_test.scan.root_event + ) + event.scope_distance = 0 + result, reason = await module_test.module.filter_event(event) + assert result == True + event = module_test.scan.make_event( + "wat.asdf.test.www.evilcorp.com", "DNS_NAME", source=module_test.scan.root_event + ) + event.scope_distance = 0 + result, reason = await module_test.module.filter_event(event) + assert result == False + assert reason == f"subdomain depth of *.asdf.test.www.evilcorp.com (4) > max_depth (3)" + event = module_test.scan.make_event( + "hmmm.ptr1234.evilcorp.com", "DNS_NAME", source=module_test.scan.root_event + ) + event.scope_distance = 0 + result, reason = await module_test.module.filter_event(event) + assert result == False + assert reason == f'"ptr1234.evilcorp.com" looks like an autogenerated PTR' + + def check(self, module_test, events): + assert len(events) == 3 + assert 1 == len( + [e for e in events if e.data == "www-test.blacklanternsecurity.com" and str(e.module) == "massdns"] + ) From 8bfb557505b030178d50279e7e3a9b9153fe77c5 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Wed, 24 Apr 2024 14:19:04 -0400 Subject: [PATCH 09/24] rename massdns --> dnsbrute --- bbot/test/test_step_1/test_cli.py | 32 +++++++++---------- bbot/test/test_step_1/test_modules_basic.py | 14 ++++---- bbot/test/test_step_1/test_presets.py | 30 ++++++++--------- .../module_tests/test_module_dnsbrute.py | 4 +-- docs/comparison.md | 2 +- docs/scanning/presets.md | 3 -- 6 files changed, 41 insertions(+), 44 deletions(-) diff --git a/bbot/test/test_step_1/test_cli.py b/bbot/test/test_step_1/test_cli.py index 0a34b4faa5..32e761679f 100644 --- a/bbot/test/test_step_1/test_cli.py +++ b/bbot/test/test_step_1/test_cli.py @@ -133,7 +133,7 @@ async def test_cli_args(monkeypatch, caplog, clean_default_config): assert "| bool" in caplog.text assert "| emit URLs in addition to DNS_NAMEs" in caplog.text assert "| False" in caplog.text - assert "| modules.massdns.wordlist" in caplog.text + assert "| modules.dnsbrute.wordlist" in caplog.text assert "| modules.robots.include_allow" in caplog.text # list module options by flag @@ -146,17 +146,17 @@ async def test_cli_args(monkeypatch, caplog, clean_default_config): assert "| bool" in caplog.text assert "| emit URLs in addition to DNS_NAMEs" in caplog.text assert "| False" in caplog.text - assert "| modules.massdns.wordlist" in caplog.text + assert "| modules.dnsbrute.wordlist" in caplog.text assert not "| modules.robots.include_allow" in caplog.text # list module options by module caplog.clear() assert not caplog.text - monkeypatch.setattr("sys.argv", ["bbot", "-m", "massdns", "-lmo"]) + monkeypatch.setattr("sys.argv", ["bbot", "-m", "dnsbrute", "-lmo"]) result = await cli._main() assert result == None assert not "| modules.wayback.urls" in caplog.text - assert "| modules.massdns.wordlist" in caplog.text + assert "| modules.dnsbrute.wordlist" in caplog.text assert not "| modules.robots.include_allow" in caplog.text # list flags @@ -219,7 +219,7 @@ async def test_cli_args(monkeypatch, caplog, clean_default_config): monkeypatch.setattr("sys.argv", ["bbot", "-l"]) result = await cli._main() assert result == None - assert "| massdns" in caplog.text + assert "| dnsbrute" in caplog.text assert "| httpx" in caplog.text assert "| robots" in caplog.text @@ -229,7 +229,7 @@ async def test_cli_args(monkeypatch, caplog, clean_default_config): monkeypatch.setattr("sys.argv", ["bbot", "-f", "subdomain-enum", "-l"]) result = await cli._main() assert result == None - assert "| massdns" in caplog.text + assert "| dnsbrute" in caplog.text assert "| httpx" in caplog.text assert not "| robots" in caplog.text @@ -238,7 +238,7 @@ async def test_cli_args(monkeypatch, caplog, clean_default_config): monkeypatch.setattr("sys.argv", ["bbot", "-f", "subdomain-enum", "-rf", "passive", "-l"]) result = await cli._main() assert result == None - assert "| massdns" in caplog.text + assert "| dnsbrute" in caplog.text assert not "| httpx" in caplog.text # list modules by flag + excluded flag @@ -247,16 +247,16 @@ async def test_cli_args(monkeypatch, caplog, clean_default_config): monkeypatch.setattr("sys.argv", ["bbot", "-f", "subdomain-enum", "-ef", "active", "-l"]) result = await cli._main() assert result == None - assert "| massdns" in caplog.text + assert "| dnsbrute" in caplog.text assert not "| httpx" in caplog.text # list modules by flag + excluded module caplog.clear() assert not caplog.text - monkeypatch.setattr("sys.argv", ["bbot", "-f", "subdomain-enum", "-em", "massdns", "-l"]) + monkeypatch.setattr("sys.argv", ["bbot", "-f", "subdomain-enum", "-em", "dnsbrute", "-l"]) result = await cli._main() assert result == None - assert not "| massdns" in caplog.text + assert not "| dnsbrute" in caplog.text assert "| httpx" in caplog.text # unconsoleable output module @@ -343,18 +343,18 @@ def test_cli_module_validation(monkeypatch, caplog): # incorrect module caplog.clear() assert not caplog.text - monkeypatch.setattr("sys.argv", ["bbot", "-m", "massdnss"]) + monkeypatch.setattr("sys.argv", ["bbot", "-m", "dnsbrutes"]) cli.main() - assert 'Could not find scan module "massdnss"' in caplog.text - assert 'Did you mean "massdns"?' in caplog.text + assert 'Could not find scan module "dnsbrutes"' in caplog.text + assert 'Did you mean "dnsbrute"?' in caplog.text # incorrect excluded module caplog.clear() assert not caplog.text - monkeypatch.setattr("sys.argv", ["bbot", "-em", "massdnss"]) + monkeypatch.setattr("sys.argv", ["bbot", "-em", "dnsbrutes"]) cli.main() - assert 'Could not find module "massdnss"' in caplog.text - assert 'Did you mean "massdns"?' in caplog.text + assert 'Could not find module "dnsbrutes"' in caplog.text + assert 'Did you mean "dnsbrute"?' in caplog.text # incorrect output module caplog.clear() diff --git a/bbot/test/test_step_1/test_modules_basic.py b/bbot/test/test_step_1/test_modules_basic.py index 03273c0a70..08fd16eec8 100644 --- a/bbot/test/test_step_1/test_modules_basic.py +++ b/bbot/test/test_step_1/test_modules_basic.py @@ -100,15 +100,15 @@ async def test_modules_basic(scan, helpers, events, bbot_scanner, httpx_mock): # module preloading all_preloaded = DEFAULT_PRESET.module_loader.preloaded() - assert "massdns" in all_preloaded - assert "DNS_NAME" in all_preloaded["massdns"]["watched_events"] - assert "DNS_NAME" in all_preloaded["massdns"]["produced_events"] - assert "subdomain-enum" in all_preloaded["massdns"]["flags"] - assert "wordlist" in all_preloaded["massdns"]["config"] - assert type(all_preloaded["massdns"]["config"]["max_resolvers"]) == int + assert "dnsbrute" in all_preloaded + assert "DNS_NAME" in all_preloaded["dnsbrute"]["watched_events"] + assert "DNS_NAME" in all_preloaded["dnsbrute"]["produced_events"] + assert "subdomain-enum" in all_preloaded["dnsbrute"]["flags"] + assert "wordlist" in all_preloaded["dnsbrute"]["config"] + assert type(all_preloaded["dnsbrute"]["config"]["max_depth"]) == int assert all_preloaded["sslcert"]["deps"]["pip"] assert all_preloaded["sslcert"]["deps"]["apt"] - assert all_preloaded["massdns"]["deps"]["common"] + assert all_preloaded["dnsbrute"]["deps"]["common"] assert all_preloaded["gowitness"]["deps"]["ansible"] all_flags = set() diff --git a/bbot/test/test_step_1/test_presets.py b/bbot/test/test_step_1/test_presets.py index d84244e4fe..6f0b9773f6 100644 --- a/bbot/test/test_step_1/test_presets.py +++ b/bbot/test/test_step_1/test_presets.py @@ -584,13 +584,13 @@ def get_module_flags(p): preset = Preset(flags=["subdomain-enum"]).bake() assert len(preset.modules) > 25 module_flags = list(get_module_flags(preset)) - massdns_flags = preset.preloaded_module("massdns").get("flags", []) - assert "subdomain-enum" in massdns_flags - assert "passive" in massdns_flags - assert not "active" in massdns_flags - assert "aggressive" in massdns_flags - assert not "safe" in massdns_flags - assert "massdns" in [x[0] for x in module_flags] + dnsbrute_flags = preset.preloaded_module("dnsbrute").get("flags", []) + assert "subdomain-enum" in dnsbrute_flags + assert "passive" in dnsbrute_flags + assert not "active" in dnsbrute_flags + assert "aggressive" in dnsbrute_flags + assert not "safe" in dnsbrute_flags + assert "dnsbrute" in [x[0] for x in module_flags] assert "certspotter" in [x[0] for x in module_flags] assert "c99" in [x[0] for x in module_flags] assert any("passive" in flags for module, flags in module_flags) @@ -602,7 +602,7 @@ def get_module_flags(p): preset = Preset(flags=["subdomain-enum"], require_flags=["passive"]).bake() assert len(preset.modules) > 25 module_flags = list(get_module_flags(preset)) - assert "massdns" in [x[0] for x in module_flags] + assert "dnsbrute" in [x[0] for x in module_flags] assert all("passive" in flags for module, flags in module_flags) assert not any("active" in flags for module, flags in module_flags) assert any("safe" in flags for module, flags in module_flags) @@ -612,17 +612,17 @@ def get_module_flags(p): preset = Preset(flags=["subdomain-enum"], exclude_flags=["active"]).bake() assert len(preset.modules) > 25 module_flags = list(get_module_flags(preset)) - assert "massdns" in [x[0] for x in module_flags] + assert "dnsbrute" in [x[0] for x in module_flags] assert all("passive" in flags for module, flags in module_flags) assert not any("active" in flags for module, flags in module_flags) assert any("safe" in flags for module, flags in module_flags) assert any("aggressive" in flags for module, flags in module_flags) # enable by flag, one excluded module - preset = Preset(flags=["subdomain-enum"], exclude_modules=["massdns"]).bake() + preset = Preset(flags=["subdomain-enum"], exclude_modules=["dnsbrute"]).bake() assert len(preset.modules) > 25 module_flags = list(get_module_flags(preset)) - assert not "massdns" in [x[0] for x in module_flags] + assert not "dnsbrute" in [x[0] for x in module_flags] assert any("passive" in flags for module, flags in module_flags) assert any("active" in flags for module, flags in module_flags) assert any("safe" in flags for module, flags in module_flags) @@ -632,7 +632,7 @@ def get_module_flags(p): preset = Preset(flags=["subdomain-enum"], require_flags=["safe", "passive"]).bake() assert len(preset.modules) > 25 module_flags = list(get_module_flags(preset)) - assert not "massdns" in [x[0] for x in module_flags] + assert not "dnsbrute" in [x[0] for x in module_flags] assert all("passive" in flags and "safe" in flags for module, flags in module_flags) assert all("active" not in flags and "aggressive" not in flags for module, flags in module_flags) assert not any("active" in flags for module, flags in module_flags) @@ -642,17 +642,17 @@ def get_module_flags(p): preset = Preset(flags=["subdomain-enum"], exclude_flags=["aggressive", "active"]).bake() assert len(preset.modules) > 25 module_flags = list(get_module_flags(preset)) - assert not "massdns" in [x[0] for x in module_flags] + assert not "dnsbrute" in [x[0] for x in module_flags] assert all("passive" in flags and "safe" in flags for module, flags in module_flags) assert all("active" not in flags and "aggressive" not in flags for module, flags in module_flags) assert not any("active" in flags for module, flags in module_flags) assert not any("aggressive" in flags for module, flags in module_flags) # enable by flag, multiple excluded modules - preset = Preset(flags=["subdomain-enum"], exclude_modules=["massdns", "c99"]).bake() + preset = Preset(flags=["subdomain-enum"], exclude_modules=["dnsbrute", "c99"]).bake() assert len(preset.modules) > 25 module_flags = list(get_module_flags(preset)) - assert not "massdns" in [x[0] for x in module_flags] + assert not "dnsbrute" in [x[0] for x in module_flags] assert "certspotter" in [x[0] for x in module_flags] assert not "c99" in [x[0] for x in module_flags] assert any("passive" in flags for module, flags in module_flags) diff --git a/bbot/test/test_step_2/module_tests/test_module_dnsbrute.py b/bbot/test/test_step_2/module_tests/test_module_dnsbrute.py index 664539bb49..2d301da94d 100644 --- a/bbot/test/test_step_2/module_tests/test_module_dnsbrute.py +++ b/bbot/test/test_step_2/module_tests/test_module_dnsbrute.py @@ -3,7 +3,7 @@ class TestDnsbrute(ModuleTestBase): subdomain_wordlist = tempwordlist(["www", "asdf"]) - config_overrides = {"modules": {"massdns": {"wordlist": str(subdomain_wordlist), "max_depth": 3}}} + config_overrides = {"modules": {"dnsbrute": {"wordlist": str(subdomain_wordlist), "max_depth": 3}}} async def setup_after_prep(self, module_test): @@ -74,5 +74,5 @@ async def new_run_live(*command, check=False, text=True, **kwargs): def check(self, module_test, events): assert len(events) == 3 assert 1 == len( - [e for e in events if e.data == "www-test.blacklanternsecurity.com" and str(e.module) == "massdns"] + [e for e in events if e.data == "www-test.blacklanternsecurity.com" and str(e.module) == "dnsbrute"] ) diff --git a/docs/comparison.md b/docs/comparison.md index 3226036f19..183e84319d 100644 --- a/docs/comparison.md +++ b/docs/comparison.md @@ -2,7 +2,7 @@ BBOT does a lot more than just subdomain enumeration. However, subdomain enumeration is arguably the most important part of OSINT, and since there's so many subdomain enumeration tools out there, they're the easiest class of tool to compare it to. -Thanks to BBOT's recursive nature (and its `massdns` module with its NLP-powered subdomain mutations), it typically finds about 20-25% more than other tools such as `Amass` or `theHarvester`. This holds true even for larger targets like `delta.com` (1000+ subdomains): +Thanks to BBOT's recursive nature (and its `dnsbrute_mutations` module with its NLP-powered subdomain mutations), it typically finds about 20-25% more than other tools such as `Amass` or `theHarvester`. This holds true especially for larger targets like `delta.com` (1000+ subdomains): ### Subdomains Found diff --git a/docs/scanning/presets.md b/docs/scanning/presets.md index f19e27550b..3d8f47a9b8 100644 --- a/docs/scanning/presets.md +++ b/docs/scanning/presets.md @@ -86,9 +86,6 @@ config: api_key: 21a270d5f59c9b05813a72bb41707266 virustotal: api_key: 4f41243847da693a4f356c0486114bc6 - # other module config options - massdns: - max_resolvers: 5000 ``` To execute your custom preset, you do: From 7e6a8edc4a889a2307b596f641776b7924e7be6b Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Wed, 24 Apr 2024 14:20:32 -0400 Subject: [PATCH 10/24] fix tests --- bbot/core/helpers/dns/brute.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bbot/core/helpers/dns/brute.py b/bbot/core/helpers/dns/brute.py index 5bfa7b2e08..da0ca09312 100644 --- a/bbot/core/helpers/dns/brute.py +++ b/bbot/core/helpers/dns/brute.py @@ -68,7 +68,7 @@ async def dnsbrute(self, module, domain, subdomains, type=None): else: results.append(hostname) - if canaries_triggered > 5: + if len(canaries_triggered) > 5: self.log.info( f"Aborting massdns on {domain} due to false positive: ({len(canaries_triggered):,} canaries triggered - {','.join(canaries_triggered)})" ) From 2aea9b109464111dad7b89014d88a6d3ca2cd733 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Wed, 24 Apr 2024 16:54:13 -0400 Subject: [PATCH 11/24] add bloom filter --- bbot/core/helpers/bloom.py | 45 ++++++++++++++ bbot/core/helpers/helper.py | 5 ++ bbot/test/test_step_1/test_bloom_filter.py | 71 ++++++++++++++++++++++ 3 files changed, 121 insertions(+) create mode 100644 bbot/core/helpers/bloom.py create mode 100644 bbot/test/test_step_1/test_bloom_filter.py diff --git a/bbot/core/helpers/bloom.py b/bbot/core/helpers/bloom.py new file mode 100644 index 0000000000..1f454ce0c5 --- /dev/null +++ b/bbot/core/helpers/bloom.py @@ -0,0 +1,45 @@ +import mmh3 +from bitarray import bitarray + + +class BloomFilter: + """ + Simple bloom filter implementation capable of rougly 200K lookups/s. + + BBOT uses bloom filters in scenarios like dns brute-forcing, where it's useful to keep track + of which mutations have been tried so far. + + A 100-megabyte bloom filter (800M bits) can store 10M entries with a .01% false-positive rate. + A python hash is 36 bytes. So if you wanted to store these in a set, this would take up + 36 * 10M * 2 (key+value) == 720 megabytes. So we save rougly 7 times the space. + """ + + def __init__(self, size=2**16): + self.size = size + self.bit_array = bitarray(size) + self.bit_array.setall(0) # Initialize all bits to 0 + + def _hashes(self, item): + item_str = str(item).encode("utf-8") + return [ + abs(hash(item)) % self.size, + abs(mmh3.hash(item_str)) % self.size, + abs(self._fnv1a_hash(item_str)) % self.size, + ] + + def _fnv1a_hash(self, data): + hash = 0x811C9DC5 # 2166136261 + for byte in data: + hash ^= byte + hash = (hash * 0x01000193) % 2**32 # 16777619 + return hash + + def add(self, item): + for hash_value in self._hashes(item): + self.bit_array[hash_value] = 1 + + def check(self, item): + return all(self.bit_array[hash_value] for hash_value in self._hashes(item)) + + def __contains__(self, item): + return self.check(item) diff --git a/bbot/core/helpers/helper.py b/bbot/core/helpers/helper.py index e4dc09326b..56b9c3bbd2 100644 --- a/bbot/core/helpers/helper.py +++ b/bbot/core/helpers/helper.py @@ -91,6 +91,11 @@ def __init__(self, preset): self.word_cloud = WordCloud(self) self.dummy_modules = {} + def bloom_filter(self, size): + from .bloom import BloomFilter + + return BloomFilter(size) + def interactsh(self, *args, **kwargs): return Interactsh(self, *args, **kwargs) diff --git a/bbot/test/test_step_1/test_bloom_filter.py b/bbot/test/test_step_1/test_bloom_filter.py new file mode 100644 index 0000000000..089f4af82f --- /dev/null +++ b/bbot/test/test_step_1/test_bloom_filter.py @@ -0,0 +1,71 @@ +import sys +import time +import string +import random + + +def test_bloom_filter(): + + def generate_random_strings(n, length=10): + """Generate a list of n random strings.""" + return ["".join(random.choices(string.ascii_letters + string.digits, k=length)) for _ in range(n)] + + from bbot.scanner import Scanner + + scan = Scanner() + + n_items_to_add = 100000 + n_items_to_test = 100000 + bloom_filter_size = 8000000 + + # Initialize the simple bloom filter and the set + bloom_filter = scan.helpers.bloom_filter(size=bloom_filter_size) + test_set = set() + + mem_size = sys.getsizeof(bloom_filter.bit_array) + print(f"Size of bit array: {mem_size}") + + # size should be roughly 1MB + assert 900000 < mem_size < 1100000 + + # Generate random strings to add + print(f"Generating {n_items_to_add:,} items to add") + items_to_add = set(generate_random_strings(n_items_to_add)) + + # Generate random strings to test + print(f"Generating {n_items_to_test:,} items to test") + items_to_test = generate_random_strings(n_items_to_test) + + print("Adding items") + start = time.time() + for item in items_to_add: + bloom_filter.add(item) + test_set.add(hash(item)) + end = time.time() + elapsed = end - start + print(f"elapsed: {elapsed:.2f} ({int(n_items_to_test/elapsed)}/s)") + # this shouldn't take longer than 5 seconds + assert elapsed < 5 + + # make sure we have 100% accuracy + start = time.time() + for item in items_to_add: + assert item in bloom_filter + end = time.time() + elapsed = end - start + print(f"elapsed: {elapsed:.2f} ({int(n_items_to_test/elapsed)}/s)") + # this shouldn't take longer than 5 seconds + assert elapsed < 5 + + print("Measuring false positives") + # Check for false positives + false_positives = 0 + for item in items_to_test: + if bloom_filter.check(item) and hash(item) not in test_set: + false_positives += 1 + false_positive_rate = false_positives / len(items_to_test) + + print(f"False positive rate: {false_positive_rate * 100:.2f}% ({false_positives}/{len(items_to_test)})") + + # ensure false positives are less than .01 percent + assert 0 < false_positives < 10 From 6fd52718f807cc1d2b5fc6a9053da63dbb7cd32b Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Wed, 24 Apr 2024 17:16:21 -0400 Subject: [PATCH 12/24] wip dnsbrute mutations --- bbot/core/helpers/dns/brute.py | 1 - 1 file changed, 1 deletion(-) diff --git a/bbot/core/helpers/dns/brute.py b/bbot/core/helpers/dns/brute.py index da0ca09312..d8996f2f48 100644 --- a/bbot/core/helpers/dns/brute.py +++ b/bbot/core/helpers/dns/brute.py @@ -122,7 +122,6 @@ async def _massdns(self, module, domain, subdomains, rdtype): hosts_yielded = set() async with self._dnsbrute_lock: async for line in module.run_process_live(*command, stderr=subprocess.DEVNULL, input=subdomains): - self.log.critical(line) try: j = json.loads(line) except json.decoder.JSONDecodeError: From 1e4927184f11d4fc57fd6181851f22966d5b00ca Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Thu, 25 Apr 2024 10:45:04 -0400 Subject: [PATCH 13/24] updates to bloom filter --- bbot/core/helpers/bloom.py | 64 ++++++++++----- bbot/modules/rapiddns.py | 2 +- bbot/test/test_step_1/test_bloom_filter.py | 16 ++-- poetry.lock | 94 +++++++++++++++++++++- pyproject.toml | 1 + 5 files changed, 145 insertions(+), 32 deletions(-) diff --git a/bbot/core/helpers/bloom.py b/bbot/core/helpers/bloom.py index 1f454ce0c5..357c715c03 100644 --- a/bbot/core/helpers/bloom.py +++ b/bbot/core/helpers/bloom.py @@ -1,12 +1,13 @@ +import os import mmh3 -from bitarray import bitarray +import mmap class BloomFilter: """ - Simple bloom filter implementation capable of rougly 200K lookups/s. + Simple bloom filter implementation capable of rougly 400K lookups/s. - BBOT uses bloom filters in scenarios like dns brute-forcing, where it's useful to keep track + BBOT uses bloom filters in scenarios like DNS brute-forcing, where it's useful to keep track of which mutations have been tried so far. A 100-megabyte bloom filter (800M bits) can store 10M entries with a .01% false-positive rate. @@ -14,18 +15,47 @@ class BloomFilter: 36 * 10M * 2 (key+value) == 720 megabytes. So we save rougly 7 times the space. """ - def __init__(self, size=2**16): - self.size = size - self.bit_array = bitarray(size) - self.bit_array.setall(0) # Initialize all bits to 0 + def __init__(self, size=8000000): + self.size = size # total bits + self.byte_size = (size + 7) // 8 # calculate byte size needed for the given number of bits + + # Create an anonymous mmap region, compatible with both Windows and Unix + if os.name == "nt": # Windows + # -1 indicates an anonymous memory map in Windows + self.mmap_file = mmap.mmap(-1, self.byte_size) + else: # Unix/Linux + # Use MAP_ANONYMOUS along with MAP_SHARED + self.mmap_file = mmap.mmap(-1, self.byte_size, prot=mmap.PROT_WRITE, flags=mmap.MAP_ANON | mmap.MAP_SHARED) + + self.clear_all_bits() + + def add(self, item): + for hash_value in self._hashes(item): + index = hash_value // 8 + position = hash_value % 8 + current_byte = self.mmap_file[index] + self.mmap_file[index] = current_byte | (1 << position) + + def check(self, item): + for hash_value in self._hashes(item): + index = hash_value // 8 + position = hash_value % 8 + current_byte = self.mmap_file[index] + if not (current_byte & (1 << position)): + return False + return True + + def clear_all_bits(self): + self.mmap_file.seek(0) + # Write zeros across the entire mmap length + self.mmap_file.write(b"\x00" * self.byte_size) def _hashes(self, item): - item_str = str(item).encode("utf-8") - return [ - abs(hash(item)) % self.size, - abs(mmh3.hash(item_str)) % self.size, - abs(self._fnv1a_hash(item_str)) % self.size, - ] + if not isinstance(item, bytes): + if not isinstance(item, str): + item = str(item) + item = item.encode("utf-8") + return [abs(hash(item)) % self.size, abs(mmh3.hash(item)) % self.size, abs(self._fnv1a_hash(item)) % self.size] def _fnv1a_hash(self, data): hash = 0x811C9DC5 # 2166136261 @@ -34,12 +64,8 @@ def _fnv1a_hash(self, data): hash = (hash * 0x01000193) % 2**32 # 16777619 return hash - def add(self, item): - for hash_value in self._hashes(item): - self.bit_array[hash_value] = 1 - - def check(self, item): - return all(self.bit_array[hash_value] for hash_value in self._hashes(item)) + def __del__(self): + self.mmap_file.close() def __contains__(self, item): return self.check(item) diff --git a/bbot/modules/rapiddns.py b/bbot/modules/rapiddns.py index 088288ddbf..7e634515bf 100644 --- a/bbot/modules/rapiddns.py +++ b/bbot/modules/rapiddns.py @@ -11,7 +11,7 @@ class rapiddns(subdomain_enum): async def request_url(self, query): url = f"{self.base_url}/subdomain/{self.helpers.quote(query)}?full=1#result" - response = await self.request_with_fail_count(url) + response = await self.request_with_fail_count(url, timeout=self.http_timeout + 10) return response def parse_results(self, r, query): diff --git a/bbot/test/test_step_1/test_bloom_filter.py b/bbot/test/test_step_1/test_bloom_filter.py index 089f4af82f..6d8e6918d3 100644 --- a/bbot/test/test_step_1/test_bloom_filter.py +++ b/bbot/test/test_step_1/test_bloom_filter.py @@ -1,4 +1,3 @@ -import sys import time import string import random @@ -20,13 +19,8 @@ def generate_random_strings(n, length=10): # Initialize the simple bloom filter and the set bloom_filter = scan.helpers.bloom_filter(size=bloom_filter_size) - test_set = set() - - mem_size = sys.getsizeof(bloom_filter.bit_array) - print(f"Size of bit array: {mem_size}") - # size should be roughly 1MB - assert 900000 < mem_size < 1100000 + test_set = set() # Generate random strings to add print(f"Generating {n_items_to_add:,} items to add") @@ -63,9 +57,9 @@ def generate_random_strings(n, length=10): for item in items_to_test: if bloom_filter.check(item) and hash(item) not in test_set: false_positives += 1 - false_positive_rate = false_positives / len(items_to_test) + false_positive_percent = false_positives / len(items_to_test) * 100 - print(f"False positive rate: {false_positive_rate * 100:.2f}% ({false_positives}/{len(items_to_test)})") + print(f"False positive rate: {false_positive_percent:.2f}% ({false_positives}/{len(items_to_test)})") - # ensure false positives are less than .01 percent - assert 0 < false_positives < 10 + # ensure false positives are less than .02 percent + assert false_positive_percent < 0.02 diff --git a/poetry.lock b/poetry.lock index be6fea4105..e54a2c530d 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1295,6 +1295,98 @@ files = [ griffe = ">=0.44" mkdocstrings = ">=0.24.2" +[[package]] +name = "mmh3" +version = "4.1.0" +description = "Python extension for MurmurHash (MurmurHash3), a set of fast and robust hash functions." +optional = false +python-versions = "*" +files = [ + {file = "mmh3-4.1.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:be5ac76a8b0cd8095784e51e4c1c9c318c19edcd1709a06eb14979c8d850c31a"}, + {file = "mmh3-4.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:98a49121afdfab67cd80e912b36404139d7deceb6773a83620137aaa0da5714c"}, + {file = "mmh3-4.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:5259ac0535874366e7d1a5423ef746e0d36a9e3c14509ce6511614bdc5a7ef5b"}, + {file = "mmh3-4.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c5950827ca0453a2be357696da509ab39646044e3fa15cad364eb65d78797437"}, + {file = "mmh3-4.1.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1dd0f652ae99585b9dd26de458e5f08571522f0402155809fd1dc8852a613a39"}, + {file = "mmh3-4.1.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:99d25548070942fab1e4a6f04d1626d67e66d0b81ed6571ecfca511f3edf07e6"}, + {file = "mmh3-4.1.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:53db8d9bad3cb66c8f35cbc894f336273f63489ce4ac416634932e3cbe79eb5b"}, + {file = "mmh3-4.1.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:75da0f615eb55295a437264cc0b736753f830b09d102aa4c2a7d719bc445ec05"}, + {file = "mmh3-4.1.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:b926b07fd678ea84b3a2afc1fa22ce50aeb627839c44382f3d0291e945621e1a"}, + {file = "mmh3-4.1.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:c5b053334f9b0af8559d6da9dc72cef0a65b325ebb3e630c680012323c950bb6"}, + {file = "mmh3-4.1.0-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:5bf33dc43cd6de2cb86e0aa73a1cc6530f557854bbbe5d59f41ef6de2e353d7b"}, + {file = "mmh3-4.1.0-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:fa7eacd2b830727ba3dd65a365bed8a5c992ecd0c8348cf39a05cc77d22f4970"}, + {file = "mmh3-4.1.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:42dfd6742b9e3eec599f85270617debfa0bbb913c545bb980c8a4fa7b2d047da"}, + {file = "mmh3-4.1.0-cp310-cp310-win32.whl", hash = "sha256:2974ad343f0d39dcc88e93ee6afa96cedc35a9883bc067febd7ff736e207fa47"}, + {file = "mmh3-4.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:74699a8984ded645c1a24d6078351a056f5a5f1fe5838870412a68ac5e28d865"}, + {file = "mmh3-4.1.0-cp310-cp310-win_arm64.whl", hash = "sha256:f0dc874cedc23d46fc488a987faa6ad08ffa79e44fb08e3cd4d4cf2877c00a00"}, + {file = "mmh3-4.1.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:3280a463855b0eae64b681cd5b9ddd9464b73f81151e87bb7c91a811d25619e6"}, + {file = "mmh3-4.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:97ac57c6c3301769e757d444fa7c973ceb002cb66534b39cbab5e38de61cd896"}, + {file = "mmh3-4.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a7b6502cdb4dbd880244818ab363c8770a48cdccecf6d729ade0241b736b5ec0"}, + {file = "mmh3-4.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:52ba2da04671a9621580ddabf72f06f0e72c1c9c3b7b608849b58b11080d8f14"}, + {file = "mmh3-4.1.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5a5fef4c4ecc782e6e43fbeab09cff1bac82c998a1773d3a5ee6a3605cde343e"}, + {file = "mmh3-4.1.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5135358a7e00991f73b88cdc8eda5203bf9de22120d10a834c5761dbeb07dd13"}, + {file = "mmh3-4.1.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cff9ae76a54f7c6fe0167c9c4028c12c1f6de52d68a31d11b6790bb2ae685560"}, + {file = "mmh3-4.1.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f6f02576a4d106d7830ca90278868bf0983554dd69183b7bbe09f2fcd51cf54f"}, + {file = "mmh3-4.1.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:073d57425a23721730d3ff5485e2da489dd3c90b04e86243dd7211f889898106"}, + {file = "mmh3-4.1.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:71e32ddec7f573a1a0feb8d2cf2af474c50ec21e7a8263026e8d3b4b629805db"}, + {file = "mmh3-4.1.0-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:7cbb20b29d57e76a58b40fd8b13a9130db495a12d678d651b459bf61c0714cea"}, + {file = "mmh3-4.1.0-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:a42ad267e131d7847076bb7e31050f6c4378cd38e8f1bf7a0edd32f30224d5c9"}, + {file = "mmh3-4.1.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:4a013979fc9390abadc445ea2527426a0e7a4495c19b74589204f9b71bcaafeb"}, + {file = "mmh3-4.1.0-cp311-cp311-win32.whl", hash = "sha256:1d3b1cdad7c71b7b88966301789a478af142bddcb3a2bee563f7a7d40519a00f"}, + {file = "mmh3-4.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:0dc6dc32eb03727467da8e17deffe004fbb65e8b5ee2b502d36250d7a3f4e2ec"}, + {file = "mmh3-4.1.0-cp311-cp311-win_arm64.whl", hash = "sha256:9ae3a5c1b32dda121c7dc26f9597ef7b01b4c56a98319a7fe86c35b8bc459ae6"}, + {file = "mmh3-4.1.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:0033d60c7939168ef65ddc396611077a7268bde024f2c23bdc283a19123f9e9c"}, + {file = "mmh3-4.1.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:d6af3e2287644b2b08b5924ed3a88c97b87b44ad08e79ca9f93d3470a54a41c5"}, + {file = "mmh3-4.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d82eb4defa245e02bb0b0dc4f1e7ee284f8d212633389c91f7fba99ba993f0a2"}, + {file = "mmh3-4.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ba245e94b8d54765e14c2d7b6214e832557e7856d5183bc522e17884cab2f45d"}, + {file = "mmh3-4.1.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bb04e2feeabaad6231e89cd43b3d01a4403579aa792c9ab6fdeef45cc58d4ec0"}, + {file = "mmh3-4.1.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1e3b1a27def545ce11e36158ba5d5390cdbc300cfe456a942cc89d649cf7e3b2"}, + {file = "mmh3-4.1.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ce0ab79ff736d7044e5e9b3bfe73958a55f79a4ae672e6213e92492ad5e734d5"}, + {file = "mmh3-4.1.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3b02268be6e0a8eeb8a924d7db85f28e47344f35c438c1e149878bb1c47b1cd3"}, + {file = "mmh3-4.1.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:deb887f5fcdaf57cf646b1e062d56b06ef2f23421c80885fce18b37143cba828"}, + {file = "mmh3-4.1.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:99dd564e9e2b512eb117bd0cbf0f79a50c45d961c2a02402787d581cec5448d5"}, + {file = "mmh3-4.1.0-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:08373082dfaa38fe97aa78753d1efd21a1969e51079056ff552e687764eafdfe"}, + {file = "mmh3-4.1.0-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:54b9c6a2ea571b714e4fe28d3e4e2db37abfd03c787a58074ea21ee9a8fd1740"}, + {file = "mmh3-4.1.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:a7b1edf24c69e3513f879722b97ca85e52f9032f24a52284746877f6a7304086"}, + {file = "mmh3-4.1.0-cp312-cp312-win32.whl", hash = "sha256:411da64b951f635e1e2284b71d81a5a83580cea24994b328f8910d40bed67276"}, + {file = "mmh3-4.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:bebc3ecb6ba18292e3d40c8712482b4477abd6981c2ebf0e60869bd90f8ac3a9"}, + {file = "mmh3-4.1.0-cp312-cp312-win_arm64.whl", hash = "sha256:168473dd608ade6a8d2ba069600b35199a9af837d96177d3088ca91f2b3798e3"}, + {file = "mmh3-4.1.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:372f4b7e1dcde175507640679a2a8790185bb71f3640fc28a4690f73da986a3b"}, + {file = "mmh3-4.1.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:438584b97f6fe13e944faf590c90fc127682b57ae969f73334040d9fa1c7ffa5"}, + {file = "mmh3-4.1.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:6e27931b232fc676675fac8641c6ec6b596daa64d82170e8597f5a5b8bdcd3b6"}, + {file = "mmh3-4.1.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:571a92bad859d7b0330e47cfd1850b76c39b615a8d8e7aa5853c1f971fd0c4b1"}, + {file = "mmh3-4.1.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4a69d6afe3190fa08f9e3a58e5145549f71f1f3fff27bd0800313426929c7068"}, + {file = "mmh3-4.1.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:afb127be0be946b7630220908dbea0cee0d9d3c583fa9114a07156f98566dc28"}, + {file = "mmh3-4.1.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:940d86522f36348ef1a494cbf7248ab3f4a1638b84b59e6c9e90408bd11ad729"}, + {file = "mmh3-4.1.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b3dcccc4935686619a8e3d1f7b6e97e3bd89a4a796247930ee97d35ea1a39341"}, + {file = "mmh3-4.1.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:01bb9b90d61854dfc2407c5e5192bfb47222d74f29d140cb2dd2a69f2353f7cc"}, + {file = "mmh3-4.1.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:bcb1b8b951a2c0b0fb8a5426c62a22557e2ffc52539e0a7cc46eb667b5d606a9"}, + {file = "mmh3-4.1.0-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:6477a05d5e5ab3168e82e8b106e316210ac954134f46ec529356607900aea82a"}, + {file = "mmh3-4.1.0-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:da5892287e5bea6977364b15712a2573c16d134bc5fdcdd4cf460006cf849278"}, + {file = "mmh3-4.1.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:99180d7fd2327a6fffbaff270f760576839dc6ee66d045fa3a450f3490fda7f5"}, + {file = "mmh3-4.1.0-cp38-cp38-win32.whl", hash = "sha256:9b0d4f3949913a9f9a8fb1bb4cc6ecd52879730aab5ff8c5a3d8f5b593594b73"}, + {file = "mmh3-4.1.0-cp38-cp38-win_amd64.whl", hash = "sha256:598c352da1d945108aee0c3c3cfdd0e9b3edef74108f53b49d481d3990402169"}, + {file = "mmh3-4.1.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:475d6d1445dd080f18f0f766277e1237fa2914e5fe3307a3b2a3044f30892103"}, + {file = "mmh3-4.1.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5ca07c41e6a2880991431ac717c2a049056fff497651a76e26fc22224e8b5732"}, + {file = "mmh3-4.1.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:0ebe052fef4bbe30c0548d12ee46d09f1b69035ca5208a7075e55adfe091be44"}, + {file = "mmh3-4.1.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eaefd42e85afb70f2b855a011f7b4d8a3c7e19c3f2681fa13118e4d8627378c5"}, + {file = "mmh3-4.1.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ac0ae43caae5a47afe1b63a1ae3f0986dde54b5fb2d6c29786adbfb8edc9edfb"}, + {file = "mmh3-4.1.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6218666f74c8c013c221e7f5f8a693ac9cf68e5ac9a03f2373b32d77c48904de"}, + {file = "mmh3-4.1.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ac59294a536ba447b5037f62d8367d7d93b696f80671c2c45645fa9f1109413c"}, + {file = "mmh3-4.1.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:086844830fcd1e5c84fec7017ea1ee8491487cfc877847d96f86f68881569d2e"}, + {file = "mmh3-4.1.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:e42b38fad664f56f77f6fbca22d08450f2464baa68acdbf24841bf900eb98e87"}, + {file = "mmh3-4.1.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:d08b790a63a9a1cde3b5d7d733ed97d4eb884bfbc92f075a091652d6bfd7709a"}, + {file = "mmh3-4.1.0-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:73ea4cc55e8aea28c86799ecacebca09e5f86500414870a8abaedfcbaf74d288"}, + {file = "mmh3-4.1.0-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:f90938ff137130e47bcec8dc1f4ceb02f10178c766e2ef58a9f657ff1f62d124"}, + {file = "mmh3-4.1.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:aa1f13e94b8631c8cd53259250556edcf1de71738936b60febba95750d9632bd"}, + {file = "mmh3-4.1.0-cp39-cp39-win32.whl", hash = "sha256:a3b680b471c181490cf82da2142029edb4298e1bdfcb67c76922dedef789868d"}, + {file = "mmh3-4.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:fefef92e9c544a8dbc08f77a8d1b6d48006a750c4375bbcd5ff8199d761e263b"}, + {file = "mmh3-4.1.0-cp39-cp39-win_arm64.whl", hash = "sha256:8e2c1f6a2b41723a4f82bd5a762a777836d29d664fc0095f17910bea0adfd4a6"}, + {file = "mmh3-4.1.0.tar.gz", hash = "sha256:a1cf25348b9acd229dda464a094d6170f47d2850a1fcb762a3b6172d2ce6ca4a"}, +] + +[package.extras] +test = ["mypy (>=1.0)", "pytest (>=7.0.0)"] + [[package]] name = "mypy-extensions" version = "1.0.0" @@ -2637,4 +2729,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "ed8bb07e4ff5a5f665402db33f9016409547bef1ccb6a8c2c626c44fde075abb" +content-hash = "baf84bb8d915bbcec435bf66a227dc0aac2dad1acc2e3f7028a19cd23f87bf1b" diff --git a/pyproject.toml b/pyproject.toml index 7ba00c488c..4ca3941884 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -52,6 +52,7 @@ regex = "^2024.4.16" unidecode = "^1.3.8" radixtarget = "^1.0.0.15" cloudcheck = "^5.0.0.350" +mmh3 = "^4.1.0" [tool.poetry.group.dev.dependencies] flake8 = ">=6,<8" From 949f9c79737decf603286fcdf3e0102f35fce219 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Thu, 25 Apr 2024 11:30:18 -0400 Subject: [PATCH 14/24] better error handling in intercept modules --- bbot/modules/base.py | 4 ++++ bbot/test/test_step_1/test_target.py | 4 ++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/bbot/modules/base.py b/bbot/modules/base.py index c102b138d4..a55ff2ae46 100644 --- a/bbot/modules/base.py +++ b/bbot/modules/base.py @@ -1481,6 +1481,10 @@ async def _worker(self): except asyncio.CancelledError: self.log.trace("Worker cancelled") raise + except BaseException as e: + self.critical(f"Critical failure in intercept module {self.name}: {e}") + self.critical(traceback.format_exc()) + self.scan.stop() self.log.trace(f"Worker stopped") async def get_incoming_event(self): diff --git a/bbot/test/test_step_1/test_target.py b/bbot/test/test_step_1/test_target.py index ed5c1b7efb..7d8117d522 100644 --- a/bbot/test/test_step_1/test_target.py +++ b/bbot/test/test_step_1/test_target.py @@ -51,12 +51,12 @@ def test_target(bbot_scanner): assert not "www.evilcorp.com" in strict_target target = Target() - target.add_target("evilcorp.com") + target.add("evilcorp.com") assert not "com" in target assert "evilcorp.com" in target assert "www.evilcorp.com" in target strict_target = Target(strict_scope=True) - strict_target.add_target("evilcorp.com") + strict_target.add("evilcorp.com") assert not "com" in strict_target assert "evilcorp.com" in strict_target assert not "www.evilcorp.com" in strict_target From c44be0b7dad6b48779ec135480564839cea99d2f Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Thu, 25 Apr 2024 15:53:36 -0400 Subject: [PATCH 15/24] add dnsbrute_mutations module --- bbot/modules/dnsbrute_mutations.py | 140 ++++++++++++++++++ .../test_module_dnsbrute_mutations.py | 70 +++++++++ 2 files changed, 210 insertions(+) create mode 100644 bbot/modules/dnsbrute_mutations.py create mode 100644 bbot/test/test_step_2/module_tests/test_module_dnsbrute_mutations.py diff --git a/bbot/modules/dnsbrute_mutations.py b/bbot/modules/dnsbrute_mutations.py new file mode 100644 index 0000000000..08e00d8781 --- /dev/null +++ b/bbot/modules/dnsbrute_mutations.py @@ -0,0 +1,140 @@ +from bbot.modules.base import BaseModule + + +class dnsbrute_mutations(BaseModule): + flags = ["subdomain-enum", "passive", "aggressive", "slow"] + watched_events = ["DNS_NAME"] + produced_events = ["DNS_NAME"] + meta = {"description": "Brute-force subdomains with massdns + target-specific mutations"} + options = { + "max_mutations": 100, + } + options_desc = { + "max_mutations": 100, + } + deps_common = ["massdns"] + _qsize = 10000 + + async def setup(self): + self.found = {} + self.source_events = self.helpers.make_target() + self.max_mutations = self.config.get("max_mutations", 500) + # 800M bits == 100MB bloom filter == 10M entries before false positives start emerging + self.mutations_tried = self.helpers.bloom_filter(800000000) + self._mutation_run = 1 + return True + + async def handle_event(self, event): + # here we don't brute-force, we just add the subdomain to our end-of-scan TODO + self.add_found(event) + + def add_found(self, event): + self.source_events.add(event) + host = str(event.host) + if self.helpers.is_subdomain(host): + subdomain, domain = host.split(".", 1) + if not self.helpers.dns.brute.has_excessive_digits(subdomain): + try: + self.found[domain].add(subdomain) + except KeyError: + self.found[domain] = {subdomain} + + async def finish(self): + found = sorted(self.found.items(), key=lambda x: len(x[-1]), reverse=True) + # if we have a lot of rounds to make, don't try mutations on less-populated domains + trimmed_found = [] + if found: + avg_subdomains = sum([len(subdomains) for domain, subdomains in found[:50]]) / len(found[:50]) + for i, (domain, subdomains) in enumerate(found): + # accept domains that are in the top 50 or have more than 5 percent of the average number of subdomains + if i < 50 or (len(subdomains) > 1 and len(subdomains) >= (avg_subdomains * 0.05)): + trimmed_found.append((domain, subdomains)) + else: + self.verbose( + f"Skipping mutations on {domain} because it only has {len(subdomains):,} subdomain(s) (avg: {avg_subdomains:,})" + ) + + base_mutations = set() + found_mutations = False + try: + for i, (domain, subdomains) in enumerate(trimmed_found): + self.verbose(f"{domain} has {len(subdomains):,} subdomains") + # keep looping as long as we're finding things + while 1: + query = domain + + mutations = set(base_mutations) + + def add_mutation(m): + h = f"{m}.{domain}" + if h not in self.mutations_tried: + self.mutations_tried.add(h) + mutations.add(m) + + # try every subdomain everywhere else + for _domain, _subdomains in found: + if _domain == domain: + continue + for s in _subdomains: + first_segment = s.split(".")[0] + # skip stuff with lots of numbers (e.g. PTRs) + if self.helpers.dns.brute.has_excessive_digits(first_segment): + continue + add_mutation(first_segment) + for word in self.helpers.extract_words( + first_segment, word_regexes=self.helpers.word_cloud.dns_mutator.extract_word_regexes + ): + add_mutation(word) + + # numbers + devops mutations + for mutation in self.helpers.word_cloud.mutations( + subdomains, cloud=False, numbers=3, number_padding=1 + ): + for delimiter in ("", ".", "-"): + m = delimiter.join(mutation).lower() + add_mutation(m) + + # special dns mutator + for subdomain in self.helpers.word_cloud.dns_mutator.mutations( + subdomains, max_mutations=self.max_mutations + ): + add_mutation(subdomain) + + if mutations: + self.info(f"Trying {len(mutations):,} mutations against {domain} ({i+1}/{len(trimmed_found)})") + results = await self.helpers.dns.brute(self, query, mutations) + for hostname in results: + source_event = self.source_events.get(hostname) + if source_event is None: + self.warning(f"Could not correlate source event from: {hostname}") + self.warning(self.source_events._radix.dns_tree.root.children) + self.warning(self.source_events._radix.dns_tree.root.children["com"].children) + self.warning( + self.source_events._radix.dns_tree.root.children["com"].children["tesla"].children + ) + source_event = self.scan.root_event + await self.emit_event( + hostname, + "DNS_NAME", + source=source_event, + tags=[f"mutation-{self._mutation_run}"], + abort_if=self.abort_if, + ) + if results: + found_mutations = True + continue + break + except AssertionError as e: + self.warning(e) + + if found_mutations: + self._mutation_run += 1 + + def abort_if(self, event): + if not event.scope_distance == 0: + return True, "event is not in scope" + if "wildcard" in event.tags: + return True, "event is a wildcard" + if "unresolved" in event.tags: + return True, "event is unresolved" + return False, "" diff --git a/bbot/test/test_step_2/module_tests/test_module_dnsbrute_mutations.py b/bbot/test/test_step_2/module_tests/test_module_dnsbrute_mutations.py new file mode 100644 index 0000000000..2a56b2b65e --- /dev/null +++ b/bbot/test/test_step_2/module_tests/test_module_dnsbrute_mutations.py @@ -0,0 +1,70 @@ +from .base import ModuleTestBase + + +class TestDnsbrute_mutations(ModuleTestBase): + targets = [ + "blacklanternsecurity.com", + "rrrr.blacklanternsecurity.com", + "asdff-ffdsa.blacklanternsecurity.com", + "hmmmm.test1.blacklanternsecurity.com", + "uuuuu.test2.blacklanternsecurity.com", + ] + + async def setup_after_prep(self, module_test): + + old_run_live = module_test.scan.helpers.run_live + + async def new_run_live(*command, check=False, text=True, **kwargs): + if "massdns" in command[:2]: + _input = [l async for l in kwargs["input"]] + if "rrrr-test.blacklanternsecurity.com" in _input: + yield """{"name": "rrrr-test.blacklanternsecurity.com.", "type": "A", "class": "IN", "status": "NOERROR", "rx_ts": 1713974911725326170, "data": {"answers": [{"ttl": 86400, "type": "A", "class": "IN", "name": "rrrr-test.blacklanternsecurity.com.", "data": "1.2.3.4."}]}, "flags": ["rd", "ra"], "resolver": "195.226.187.130:53", "proto": "UDP"}""" + if "rrrr-ffdsa.blacklanternsecurity.com" in _input: + yield """{"name": "rrrr-ffdsa.blacklanternsecurity.com.", "type": "A", "class": "IN", "status": "NOERROR", "rx_ts": 1713974911725326170, "data": {"answers": [{"ttl": 86400, "type": "A", "class": "IN", "name": "rrrr-ffdsa.blacklanternsecurity.com.", "data": "1.2.3.4."}]}, "flags": ["rd", "ra"], "resolver": "195.226.187.130:53", "proto": "UDP"}""" + if "hmmmm.test2.blacklanternsecurity.com" in _input: + yield """{"name": "hmmmm.test2.blacklanternsecurity.com.", "type": "A", "class": "IN", "status": "NOERROR", "rx_ts": 1713974911725326170, "data": {"answers": [{"ttl": 86400, "type": "A", "class": "IN", "name": "hmmmm.test2.blacklanternsecurity.com.", "data": "1.2.3.4."}]}, "flags": ["rd", "ra"], "resolver": "195.226.187.130:53", "proto": "UDP"}""" + else: + async for _ in old_run_live(*command, check=False, text=True, **kwargs): + yield _ + + module_test.monkeypatch.setattr(module_test.scan.helpers, "run_live", new_run_live) + + await module_test.mock_dns( + { + # targets + "rrrr.blacklanternsecurity.com": {"A": ["1.2.3.4"]}, + "asdff-ffdsa.blacklanternsecurity.com": {"A": ["1.2.3.4"]}, + "hmmmm.test1.blacklanternsecurity.com": {"A": ["1.2.3.4"]}, + "uuuuu.test2.blacklanternsecurity.com": {"A": ["1.2.3.4"]}, + # devops mutation + "rrrr-test.blacklanternsecurity.com": {"A": ["1.2.3.4"]}, + # target-specific dns mutation + "rrrr-ffdsa.blacklanternsecurity.com": {"A": ["1.2.3.4"]}, + # subdomain from one subdomain on a different subdomain + "hmmmm.test2.blacklanternsecurity.com": {"A": ["1.2.3.4"]}, + } + ) + + def check(self, module_test, events): + assert len(events) == 9 + assert 1 == len( + [ + e + for e in events + if e.data == "rrrr-test.blacklanternsecurity.com" and str(e.module) == "dnsbrute_mutations" + ] + ), "Failed to find devops mutation (word_cloud)" + assert 1 == len( + [ + e + for e in events + if e.data == "rrrr-ffdsa.blacklanternsecurity.com" and str(e.module) == "dnsbrute_mutations" + ] + ), "Failed to find target-specific mutation (word_cloud.dns_mutator)" + assert 1 == len( + [ + e + for e in events + if e.data == "hmmmm.test2.blacklanternsecurity.com" and str(e.module) == "dnsbrute_mutations" + ] + ), "Failed to find subdomain taken from another subdomain" From e94556b594db8551f3bd3453418df10a30bf1338 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Thu, 25 Apr 2024 16:03:59 -0400 Subject: [PATCH 16/24] fix tests --- bbot/modules/dnsbrute_mutations.py | 2 +- bbot/test/test_step_1/test_cli.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/bbot/modules/dnsbrute_mutations.py b/bbot/modules/dnsbrute_mutations.py index 08e00d8781..06fd036892 100644 --- a/bbot/modules/dnsbrute_mutations.py +++ b/bbot/modules/dnsbrute_mutations.py @@ -10,7 +10,7 @@ class dnsbrute_mutations(BaseModule): "max_mutations": 100, } options_desc = { - "max_mutations": 100, + "max_mutations": "Maximum number of target-specific mutations to try per subdomain", } deps_common = ["massdns"] _qsize = 10000 diff --git a/bbot/test/test_step_1/test_cli.py b/bbot/test/test_step_1/test_cli.py index 32e761679f..32e9bcadbd 100644 --- a/bbot/test/test_step_1/test_cli.py +++ b/bbot/test/test_step_1/test_cli.py @@ -253,7 +253,7 @@ async def test_cli_args(monkeypatch, caplog, clean_default_config): # list modules by flag + excluded module caplog.clear() assert not caplog.text - monkeypatch.setattr("sys.argv", ["bbot", "-f", "subdomain-enum", "-em", "dnsbrute", "-l"]) + monkeypatch.setattr("sys.argv", ["bbot", "-f", "subdomain-enum", "-em", "dnsbrute", "dnsbrute_mutations", "-l"]) result = await cli._main() assert result == None assert not "| dnsbrute" in caplog.text From a2669b0b825f81546af63955bc16a0c8e7a005de Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Thu, 25 Apr 2024 16:52:07 -0400 Subject: [PATCH 17/24] fix dnsbrute tests --- bbot/test/test_step_2/module_tests/test_module_dnsbrute.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bbot/test/test_step_2/module_tests/test_module_dnsbrute.py b/bbot/test/test_step_2/module_tests/test_module_dnsbrute.py index 2d301da94d..fab736cca3 100644 --- a/bbot/test/test_step_2/module_tests/test_module_dnsbrute.py +++ b/bbot/test/test_step_2/module_tests/test_module_dnsbrute.py @@ -11,7 +11,7 @@ async def setup_after_prep(self, module_test): async def new_run_live(*command, check=False, text=True, **kwargs): if "massdns" in command[:2]: - yield """{"name": "www-test.blacklanternsecurity.com.", "type": "A", "class": "IN", "status": "NOERROR", "rx_ts": 1713974911725326170, "data": {"answers": [{"ttl": 86400, "type": "A", "class": "IN", "name": "www-test.blacklanternsecurity.com.", "data": "1.2.3.4."}]}, "flags": ["rd", "ra"], "resolver": "195.226.187.130:53", "proto": "UDP"}""" + yield """{"name": "asdf.blacklanternsecurity.com.", "type": "A", "class": "IN", "status": "NOERROR", "rx_ts": 1713974911725326170, "data": {"answers": [{"ttl": 86400, "type": "A", "class": "IN", "name": "asdf.blacklanternsecurity.com.", "data": "1.2.3.4."}]}, "flags": ["rd", "ra"], "resolver": "195.226.187.130:53", "proto": "UDP"}""" else: async for _ in old_run_live(*command, check=False, text=True, **kwargs): yield _ @@ -20,7 +20,7 @@ async def new_run_live(*command, check=False, text=True, **kwargs): await module_test.mock_dns( { - "www-test.blacklanternsecurity.com": {"A": ["1.2.3.4"]}, + "asdf.blacklanternsecurity.com": {"A": ["1.2.3.4"]}, } ) @@ -74,5 +74,5 @@ async def new_run_live(*command, check=False, text=True, **kwargs): def check(self, module_test, events): assert len(events) == 3 assert 1 == len( - [e for e in events if e.data == "www-test.blacklanternsecurity.com" and str(e.module) == "dnsbrute"] + [e for e in events if e.data == "asdf.blacklanternsecurity.com" and str(e.module) == "dnsbrute"] ) From c5de1360e8e5ccba04b23035f675a529282b7dc2 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Thu, 25 Apr 2024 17:02:37 -0400 Subject: [PATCH 18/24] remove debug message --- bbot/modules/dnsbrute.py | 1 - 1 file changed, 1 deletion(-) diff --git a/bbot/modules/dnsbrute.py b/bbot/modules/dnsbrute.py index 2df3a48d60..8dc0140846 100644 --- a/bbot/modules/dnsbrute.py +++ b/bbot/modules/dnsbrute.py @@ -43,7 +43,6 @@ async def eligible_for_enumeration(self, event): return eligible, reason async def handle_event(self, event): - self.hugewarning(event) query = self.make_query(event) self.info(f"Brute-forcing subdomains for {query} (source: {event.data})") for hostname in await self.helpers.dns.brute(self, query, self.subdomain_list): From 5f3948a6e7f8e13505f220ca9579c763d7e832ab Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Mon, 29 Apr 2024 16:14:18 -0400 Subject: [PATCH 19/24] fix dnsbrute tests, fix https://github.com/blacklanternsecurity/bbot/issues/1315 --- bbot/modules/dnsbrute.py | 6 +- bbot/modules/templates/subdomain_enum.py | 41 +++++-------- .../module_tests/test_module_dnsbrute.py | 61 ++++++++++++------- 3 files changed, 57 insertions(+), 51 deletions(-) diff --git a/bbot/modules/dnsbrute.py b/bbot/modules/dnsbrute.py index 8dc0140846..429a556207 100644 --- a/bbot/modules/dnsbrute.py +++ b/bbot/modules/dnsbrute.py @@ -16,7 +16,7 @@ class dnsbrute(subdomain_enum): } deps_common = ["massdns"] reject_wildcards = "strict" - dedup_strategy = "parent_domain" + dedup_strategy = "lowest_parent" _qsize = 10000 async def setup(self): @@ -25,8 +25,8 @@ async def setup(self): self.subdomain_list = set(self.helpers.read_file(self.subdomain_file)) return await super().setup() - async def eligible_for_enumeration(self, event): - eligible, reason = await super().eligible_for_enumeration(event) + async def filter_event(self, event): + eligible, reason = await super().filter_event(event) query = self.make_query(event) # limit brute force depth diff --git a/bbot/modules/templates/subdomain_enum.py b/bbot/modules/templates/subdomain_enum.py index 3c65dfa34b..18243c393b 100644 --- a/bbot/modules/templates/subdomain_enum.py +++ b/bbot/modules/templates/subdomain_enum.py @@ -26,28 +26,9 @@ class subdomain_enum(BaseModule): # how to deduplicate incoming events # options: - # "root_domain": if a dns name has already been tried, don't try any of its children - # "parent_domain": always try a domain unless its direct parent has already been tried - dedup_strategy = "root_domain" - - async def setup(self): - strict_scope = self.dedup_strategy == "parent_domain" - self.processed = self.helpers.make_target(strict_scope=strict_scope) - return True - - async def filter_event(self, event): - """ - This filter_event is used across many modules - """ - query = self.make_query(event) - # reject if already processed - if query in self.processed: - return False, "Event was already processed" - eligible, reason = await self.eligible_for_enumeration(event) - if eligible: - self.processed.add(query) - return True, reason - return False, reason + # "highest_parent": dedupe by highest parent (highest parent of www.api.test.evilcorp.com is evilcorp.com) + # "lowest_parent": dedupe by lowest parent (lowest parent of www.api.test.evilcorp.com is api.test.evilcorp.com) + dedup_strategy = "highest_parent" async def handle_event(self, event): query = self.make_query(event) @@ -68,10 +49,18 @@ async def request_url(self, query): return await self.request_with_fail_count(url) def make_query(self, event): - if "target" in event.tags: - query = str(event.data) + query = event.data + parents = list(self.helpers.domain_parents(event.data)) + if self.dedup_strategy == "highest_parent": + parents = list(reversed(parents)) + elif self.dedup_strategy == "lowest_parent": + pass else: - query = self.helpers.parent_domain(event.data).lower() + raise ValueError('self.dedup_strategy attribute must be set to either "highest_parent" or "lowest_parent"') + for p in parents: + if self.scan.in_scope(p): + query = p + break return ".".join([s for s in query.split(".") if s != "_wildcard"]) def parse_results(self, r, query=None): @@ -114,7 +103,7 @@ async def _is_wildcard(self, query): return True return False - async def eligible_for_enumeration(self, event): + async def filter_event(self, event): query = self.make_query(event) # check if wildcard is_wildcard = await self._is_wildcard(query) diff --git a/bbot/test/test_step_2/module_tests/test_module_dnsbrute.py b/bbot/test/test_step_2/module_tests/test_module_dnsbrute.py index fab736cca3..bdbd2f6cb5 100644 --- a/bbot/test/test_step_2/module_tests/test_module_dnsbrute.py +++ b/bbot/test/test_step_2/module_tests/test_module_dnsbrute.py @@ -3,6 +3,7 @@ class TestDnsbrute(ModuleTestBase): subdomain_wordlist = tempwordlist(["www", "asdf"]) + blacklist = ["api.asdf.blacklanternsecurity.com"] config_overrides = {"modules": {"dnsbrute": {"wordlist": str(subdomain_wordlist), "max_depth": 3}}} async def setup_after_prep(self, module_test): @@ -24,52 +25,68 @@ async def new_run_live(*command, check=False, text=True, **kwargs): } ) + module = module_test.module + scan = module_test.scan + + # test query logic + event = scan.make_event("blacklanternsecurity.com", "DNS_NAME", dummy=True) + assert module.make_query(event) == "blacklanternsecurity.com" + event = scan.make_event("asdf.blacklanternsecurity.com", "DNS_NAME", dummy=True) + assert module.make_query(event) == "blacklanternsecurity.com" + event = scan.make_event("api.asdf.blacklanternsecurity.com", "DNS_NAME", dummy=True) + assert module.make_query(event) == "asdf.blacklanternsecurity.com" + event = scan.make_event("test.api.asdf.blacklanternsecurity.com", "DNS_NAME", dummy=True) + assert module.make_query(event) == "asdf.blacklanternsecurity.com" + + assert module.dedup_strategy == "lowest_parent" + module.dedup_strategy = "highest_parent" + event = scan.make_event("blacklanternsecurity.com", "DNS_NAME", dummy=True) + assert module.make_query(event) == "blacklanternsecurity.com" + event = scan.make_event("asdf.blacklanternsecurity.com", "DNS_NAME", dummy=True) + assert module.make_query(event) == "blacklanternsecurity.com" + event = scan.make_event("api.asdf.blacklanternsecurity.com", "DNS_NAME", dummy=True) + assert module.make_query(event) == "blacklanternsecurity.com" + event = scan.make_event("test.api.asdf.blacklanternsecurity.com", "DNS_NAME", dummy=True) + assert module.make_query(event) == "blacklanternsecurity.com" + module.dedup_strategy = "lowest_parent" + # test recursive brute-force event filtering - event = module_test.scan.make_event("evilcorp.com", "DNS_NAME", source=module_test.scan.root_event) + event = module_test.scan.make_event("blacklanternsecurity.com", "DNS_NAME", source=module_test.scan.root_event) event.scope_distance = 0 result, reason = await module_test.module.filter_event(event) assert result == True - assert "evilcorp.com" in module_test.module.processed - assert not "com" in module_test.module.processed - event = module_test.scan.make_event("evilcorp.com", "DNS_NAME", source=module_test.scan.root_event) - event.scope_distance = 0 - result, reason = await module_test.module.filter_event(event) - assert result == False - assert reason == "Event was already processed" - event = module_test.scan.make_event("www.evilcorp.com", "DNS_NAME", source=module_test.scan.root_event) - event.scope_distance = 0 - result, reason = await module_test.module.filter_event(event) - assert result == False - assert reason == "Event was already processed" - event = module_test.scan.make_event("test.www.evilcorp.com", "DNS_NAME", source=module_test.scan.root_event) + event = module_test.scan.make_event( + "www.blacklanternsecurity.com", "DNS_NAME", source=module_test.scan.root_event + ) event.scope_distance = 0 result, reason = await module_test.module.filter_event(event) assert result == True - event = module_test.scan.make_event("test.www.evilcorp.com", "DNS_NAME", source=module_test.scan.root_event) + event = module_test.scan.make_event( + "test.www.blacklanternsecurity.com", "DNS_NAME", source=module_test.scan.root_event + ) event.scope_distance = 0 result, reason = await module_test.module.filter_event(event) - assert result == False - assert reason == "Event was already processed" + assert result == True event = module_test.scan.make_event( - "asdf.test.www.evilcorp.com", "DNS_NAME", source=module_test.scan.root_event + "asdf.test.www.blacklanternsecurity.com", "DNS_NAME", source=module_test.scan.root_event ) event.scope_distance = 0 result, reason = await module_test.module.filter_event(event) assert result == True event = module_test.scan.make_event( - "wat.asdf.test.www.evilcorp.com", "DNS_NAME", source=module_test.scan.root_event + "wat.asdf.test.www.blacklanternsecurity.com", "DNS_NAME", source=module_test.scan.root_event ) event.scope_distance = 0 result, reason = await module_test.module.filter_event(event) assert result == False - assert reason == f"subdomain depth of *.asdf.test.www.evilcorp.com (4) > max_depth (3)" + assert reason == f"subdomain depth of *.asdf.test.www.blacklanternsecurity.com (4) > max_depth (3)" event = module_test.scan.make_event( - "hmmm.ptr1234.evilcorp.com", "DNS_NAME", source=module_test.scan.root_event + "hmmm.ptr1234.blacklanternsecurity.com", "DNS_NAME", source=module_test.scan.root_event ) event.scope_distance = 0 result, reason = await module_test.module.filter_event(event) assert result == False - assert reason == f'"ptr1234.evilcorp.com" looks like an autogenerated PTR' + assert reason == f'"ptr1234.blacklanternsecurity.com" looks like an autogenerated PTR' def check(self, module_test, events): assert len(events) == 3 From 6414d632d39e407d204f566bbbc9d53f971fdc1d Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Wed, 1 May 2024 08:47:01 -0400 Subject: [PATCH 20/24] update documentation --- bbot/core/helpers/dns/brute.py | 7 ++++--- bbot/core/helpers/wordcloud.py | 2 +- bbot/defaults.yml | 4 ++++ docs/scanning/tips_and_tricks.md | 4 ++-- 4 files changed, 11 insertions(+), 6 deletions(-) diff --git a/bbot/core/helpers/dns/brute.py b/bbot/core/helpers/dns/brute.py index d8996f2f48..c34e966108 100644 --- a/bbot/core/helpers/dns/brute.py +++ b/bbot/core/helpers/dns/brute.py @@ -23,7 +23,7 @@ def __init__(self, parent_helper): self.parent_helper = parent_helper self.log = logging.getLogger("bbot.helper.dns.brute") self.num_canaries = 100 - self.max_resolvers = 1000 + self.max_resolvers = self.parent_helper.config.get("dns", {}).get("brute_threads", 1000) self.devops_mutations = list(self.parent_helper.word_cloud.devops_mutations) self.digit_regex = self.parent_helper.re.compile(r"\d+") self._resolver_file = None @@ -142,8 +142,9 @@ async def _massdns(self, module, domain, subdomains, rdtype): async def gen_subdomains(self, prefixes, domain): for p in prefixes: - d = f"{p}.{domain}" - yield d + if domain: + p = f"{p}.{domain}" + yield p async def resolver_file(self): if self._resolver_file is None: diff --git a/bbot/core/helpers/wordcloud.py b/bbot/core/helpers/wordcloud.py index 5eafb00c59..fbd4e75930 100644 --- a/bbot/core/helpers/wordcloud.py +++ b/bbot/core/helpers/wordcloud.py @@ -451,7 +451,7 @@ def add_word(self, word): class DNSMutator(Mutator): """ - DNS-specific mutator used by the `massdns` module to generate target-specific subdomain mutations. + DNS-specific mutator used by the `dnsbrute_mutations` module to generate target-specific subdomain mutations. This class extends the Mutator base class to add DNS-specific logic for generating subdomain mutations based on input words. It utilizes custom word extraction patterns diff --git a/bbot/defaults.yml b/bbot/defaults.yml index 4b9b5210d1..42cd265f0d 100644 --- a/bbot/defaults.yml +++ b/bbot/defaults.yml @@ -24,6 +24,10 @@ deps: ffuf: version: "2.1.0" +dns: + # Number of concurrent massdns lookups (-s) + brute_threads: 1000 + ### WEB SPIDER ### # Set the maximum number of HTTP links that can be followed in a row (0 == no spidering allowed) diff --git a/docs/scanning/tips_and_tricks.md b/docs/scanning/tips_and_tricks.md index 885e461dc0..f019f742a2 100644 --- a/docs/scanning/tips_and_tricks.md +++ b/docs/scanning/tips_and_tricks.md @@ -30,13 +30,13 @@ To change the number of instances, you can set a module's `max_event_handlers` i bbot -t evilcorp.com -m baddns -c modules.baddns.max_event_handlers=20 ``` -### Boost Massdns Thread Count +### Boost DNS Brute-force Speed If you have a fast internet connection or are running BBOT from a cloud VM, you can speed up subdomain enumeration by cranking the threads for `massdns`. The default is `1000`, which is about 1MB/s of DNS traffic: ```bash # massdns with 5000 resolvers, about 5MB/s -bbot -t evilcorp.com -f subdomain-enum -c modules.massdns.max_resolvers=5000 +bbot -t evilcorp.com -f subdomain-enum -c dns.brute_threads=5000 ``` ### Web Spider From 5c9c1b8ad429d1a36399e8c876507dbaf28880da Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Wed, 1 May 2024 23:58:34 -0400 Subject: [PATCH 21/24] target perf optimization --- bbot/modules/dnsbrute_mutations.py | 2 +- bbot/modules/nmap.py | 2 +- bbot/scanner/target.py | 32 +++++++++++++++++++----------- 3 files changed, 22 insertions(+), 14 deletions(-) diff --git a/bbot/modules/dnsbrute_mutations.py b/bbot/modules/dnsbrute_mutations.py index 06fd036892..a02ef7e9e0 100644 --- a/bbot/modules/dnsbrute_mutations.py +++ b/bbot/modules/dnsbrute_mutations.py @@ -104,7 +104,7 @@ def add_mutation(m): self.info(f"Trying {len(mutations):,} mutations against {domain} ({i+1}/{len(trimmed_found)})") results = await self.helpers.dns.brute(self, query, mutations) for hostname in results: - source_event = self.source_events.get(hostname) + source_event = self.source_events.get_host(hostname) if source_event is None: self.warning(f"Could not correlate source event from: {hostname}") self.warning(self.source_events._radix.dns_tree.root.children) diff --git a/bbot/modules/nmap.py b/bbot/modules/nmap.py index ccdb0974e4..4374155ab5 100644 --- a/bbot/modules/nmap.py +++ b/bbot/modules/nmap.py @@ -43,7 +43,7 @@ async def handle_batch(self, *events): for host in self.parse_nmap_xml(output_file): source_event = None for h in [host.address] + host.hostnames: - source_event = target.get(h) + source_event = target.get_host(h) if source_event is not None: break if source_event is None: diff --git a/bbot/scanner/target.py b/bbot/scanner/target.py index 878e80846f..9cca55ecce 100644 --- a/bbot/scanner/target.py +++ b/bbot/scanner/target.py @@ -7,6 +7,7 @@ from bbot.errors import * from bbot.modules.base import BaseModule +from bbot.core.helpers.misc import make_ip_type from bbot.core.event import make_event, is_event log = logging.getLogger("bbot.core.target") @@ -212,20 +213,27 @@ def get(self, host): """ try: - other = make_event(host, dummy=True) + event = make_event(host, dummy=True) except ValidationError: return - if other.host: - with suppress(KeyError, StopIteration): - result = self._radix.search(other.host) - if result is not None: - for event in result: - # if the result is a dns name and strict scope is enabled - if isinstance(event.host, str) and self.strict_scope: - # if the result doesn't exactly equal the host, abort - if event.host != other.host: - return - return event + if event.host: + return self.get_host(event.host) + + def get_host(self, host): + """ + A more efficient version of .get() that only accepts hostnames and IP addresses + """ + host = make_ip_type(host) + with suppress(KeyError, StopIteration): + result = self._radix.search(host) + if result is not None: + for event in result: + # if the result is a dns name and strict scope is enabled + if isinstance(event.host, str) and self.strict_scope: + # if the result doesn't exactly equal the host, abort + if event.host != host: + return + return event def _add_event(self, event): radix_data = self._radix.search(event.host) From 47161e5d5a225c6edce5c5bfd1cb1cba647a5a32 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Sat, 4 May 2024 20:22:48 -0400 Subject: [PATCH 22/24] fix cli tests --- bbot/test/test_step_1/test_cli.py | 160 +++++++++++++++--------------- 1 file changed, 80 insertions(+), 80 deletions(-) diff --git a/bbot/test/test_step_1/test_cli.py b/bbot/test/test_step_1/test_cli.py index 524557facf..63eb8334bc 100644 --- a/bbot/test/test_step_1/test_cli.py +++ b/bbot/test/test_step_1/test_cli.py @@ -65,148 +65,148 @@ async def test_cli_args(monkeypatch, caplog, capsys, clean_default_config): assert len(out.splitlines()) == 1 assert out.count(".") > 1 + # list modules + monkeypatch.setattr("sys.argv", ["bbot", "--list-modules"]) + result = await cli._main() + assert result == None + out, err = capsys.readouterr() # internal modules - assert "| excavate" in caplog.text + assert "| excavate" in out # output modules - assert "| csv" in caplog.text + assert "| csv" in out # scan modules - assert "| wayback" in caplog.text + assert "| wayback" in out + + # output dir and scan name + output_dir = bbot_test_dir / "bbot_cli_args_output" + scan_name = "bbot_cli_args_scan_name" + scan_dir = output_dir / scan_name + assert not output_dir.exists() + monkeypatch.setattr("sys.argv", ["bbot", "-o", str(output_dir), "-n", scan_name, "-y"]) + result = await cli._main() + assert result == True + assert output_dir.is_dir() + assert scan_dir.is_dir() + assert "[SCAN]" in open(scan_dir / "output.txt").read() + assert "[INFO]" in open(scan_dir / "scan.log").read() # list module options - caplog.clear() - assert not caplog.text monkeypatch.setattr("sys.argv", ["bbot", "--list-module-options"]) result = await cli._main() + out, err = capsys.readouterr() assert result == None - assert "| modules.wayback.urls" in caplog.text - assert "| bool" in caplog.text - assert "| emit URLs in addition to DNS_NAMEs" in caplog.text - assert "| False" in caplog.text - assert "| modules.dnsbrute.wordlist" in caplog.text - assert "| modules.robots.include_allow" in caplog.text + assert "| modules.wayback.urls" in out + assert "| bool" in out + assert "| emit URLs in addition to DNS_NAMEs" in out + assert "| False" in out + assert "| modules.dnsbrute.wordlist" in out + assert "| modules.robots.include_allow" in out # list module options by flag - caplog.clear() - assert not caplog.text monkeypatch.setattr("sys.argv", ["bbot", "-f", "subdomain-enum", "--list-module-options"]) result = await cli._main() + out, err = capsys.readouterr() assert result == None - assert "| modules.wayback.urls" in caplog.text - assert "| bool" in caplog.text - assert "| emit URLs in addition to DNS_NAMEs" in caplog.text - assert "| False" in caplog.text - assert "| modules.dnsbrute.wordlist" in caplog.text - assert not "| modules.robots.include_allow" in caplog.text + assert "| modules.wayback.urls" in out + assert "| bool" in out + assert "| emit URLs in addition to DNS_NAMEs" in out + assert "| False" in out + assert "| modules.dnsbrute.wordlist" in out + assert not "| modules.robots.include_allow" in out # list module options by module - caplog.clear() - assert not caplog.text monkeypatch.setattr("sys.argv", ["bbot", "-m", "dnsbrute", "-lmo"]) result = await cli._main() + out, err = capsys.readouterr() + assert result == None + assert out.count("modules.") == out.count("modules.dnsbrute.") + assert not "| modules.wayback.urls" in out + assert "| modules.dnsbrute.wordlist" in out + assert not "| modules.robots.include_allow" in out + + # list output module options by module + monkeypatch.setattr("sys.argv", ["bbot", "-om", "stdout", "-lmo"]) + result = await cli._main() + out, err = capsys.readouterr() assert result == None - assert not "| modules.wayback.urls" in caplog.text - assert "| modules.dnsbrute.wordlist" in caplog.text - assert not "| modules.robots.include_allow" in caplog.text + assert out.count("modules.") == out.count("modules.stdout.") # list flags - caplog.clear() - assert not caplog.text monkeypatch.setattr("sys.argv", ["bbot", "--list-flags"]) result = await cli._main() + out, err = capsys.readouterr() assert result == None - assert "| safe" in caplog.text - assert "| Non-intrusive, safe to run" in caplog.text - assert "| active" in caplog.text - assert "| passive" in caplog.text + assert "| safe" in out + assert "| Non-intrusive, safe to run" in out + assert "| active" in out + assert "| passive" in out # list only a single flag - caplog.clear() - assert not caplog.text monkeypatch.setattr("sys.argv", ["bbot", "-f", "active", "--list-flags"]) result = await cli._main() + out, err = capsys.readouterr() assert result == None - assert not "| safe" in caplog.text - assert "| active" in caplog.text - assert not "| passive" in caplog.text + assert not "| safe" in out + assert "| active" in out + assert not "| passive" in out # list multiple flags - caplog.clear() - assert not caplog.text monkeypatch.setattr("sys.argv", ["bbot", "-f", "active", "safe", "--list-flags"]) result = await cli._main() + out, err = capsys.readouterr() assert result == None - assert "| safe" in caplog.text - assert "| active" in caplog.text - assert not "| passive" in caplog.text - - # custom target type - caplog.clear() - assert not caplog.text - monkeypatch.setattr("sys.argv", ["bbot", "-t", "ORG:evilcorp"]) - result = await cli._main() - assert result == True - assert "[ORG_STUB] evilcorp TARGET" in caplog.text - - # activate modules by flag - caplog.clear() - assert not caplog.text - monkeypatch.setattr("sys.argv", ["bbot", "-f", "passive"]) - result = await cli._main() - assert result == True + assert "| safe" in out + assert "| active" in out + assert not "| passive" in out # no args - caplog.clear() - assert not caplog.text monkeypatch.setattr("sys.argv", ["bbot"]) result = await cli._main() + out, err = capsys.readouterr() assert result == None - assert "Target:\n -t TARGET [TARGET ...]" in caplog.text + assert "Target:\n -t TARGET [TARGET ...]" in out # list modules - caplog.clear() - assert not caplog.text monkeypatch.setattr("sys.argv", ["bbot", "-l"]) result = await cli._main() + out, err = capsys.readouterr() assert result == None - assert "| dnsbrute" in caplog.text - assert "| httpx" in caplog.text - assert "| robots" in caplog.text + assert "| dnsbrute " in out + assert "| httpx" in out + assert "| robots" in out # list modules by flag - caplog.clear() - assert not caplog.text monkeypatch.setattr("sys.argv", ["bbot", "-f", "subdomain-enum", "-l"]) result = await cli._main() + out, err = capsys.readouterr() assert result == None - assert "| dnsbrute" in caplog.text - assert "| httpx" in caplog.text - assert not "| robots" in caplog.text + assert "| dnsbrute " in out + assert "| httpx" in out + assert not "| robots" in out # list modules by flag + required flag - caplog.clear() monkeypatch.setattr("sys.argv", ["bbot", "-f", "subdomain-enum", "-rf", "passive", "-l"]) result = await cli._main() + out, err = capsys.readouterr() assert result == None - assert "| dnsbrute" in caplog.text - assert not "| httpx" in caplog.text + assert "| dnsbrute " in out + assert not "| httpx" in out # list modules by flag + excluded flag - caplog.clear() - assert not caplog.text monkeypatch.setattr("sys.argv", ["bbot", "-f", "subdomain-enum", "-ef", "active", "-l"]) result = await cli._main() + out, err = capsys.readouterr() assert result == None - assert "| dnsbrute" in caplog.text - assert not "| httpx" in caplog.text + assert "| dnsbrute " in out + assert not "| httpx" in out # list modules by flag + excluded module - caplog.clear() - assert not caplog.text - monkeypatch.setattr("sys.argv", ["bbot", "-f", "subdomain-enum", "-em", "dnsbrute", "dnsbrute_mutations", "-l"]) + monkeypatch.setattr("sys.argv", ["bbot", "-f", "subdomain-enum", "-em", "dnsbrute", "-l"]) result = await cli._main() + out, err = capsys.readouterr() assert result == None - assert not "| dnsbrute" in caplog.text - assert "| httpx" in caplog.text + assert not "| dnsbrute " in out + assert "| httpx" in out # output modules override caplog.clear() From 07a061c4fb8f1015af0601f337f6e4d340e39a29 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Sat, 4 May 2024 20:30:06 -0400 Subject: [PATCH 23/24] restore --install-all-deps test --- bbot/test/test_step_1/test_cli.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bbot/test/test_step_1/test_cli.py b/bbot/test/test_step_1/test_cli.py index b658f36bc1..0d36408c03 100644 --- a/bbot/test/test_step_1/test_cli.py +++ b/bbot/test/test_step_1/test_cli.py @@ -309,9 +309,9 @@ async def test_cli_args(monkeypatch, caplog, capsys, clean_default_config): assert result == True, "-m nuclei failed to run with --allow-deadly" # install all deps - # monkeypatch.setattr("sys.argv", ["bbot", "--install-all-deps"]) - # success = await cli._main() - # assert success, "--install-all-deps failed for at least one module" + monkeypatch.setattr("sys.argv", ["bbot", "--install-all-deps"]) + success = await cli._main() + assert success == True, "--install-all-deps failed for at least one module" def test_cli_config_validation(monkeypatch, caplog): From 31fb52bb45ba43fde955d323f7cb18010e3a30ce Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Sat, 4 May 2024 21:45:27 -0400 Subject: [PATCH 24/24] fix dnsbrute tests --- bbot/test/test_step_2/module_tests/test_module_dnsbrute.py | 5 ++++- .../module_tests/test_module_dnsbrute_mutations.py | 1 + .../test_step_2/module_tests/test_module_dnscommonsrv.py | 4 +++- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/bbot/test/test_step_2/module_tests/test_module_dnsbrute.py b/bbot/test/test_step_2/module_tests/test_module_dnsbrute.py index bdbd2f6cb5..d1c5e5cc9c 100644 --- a/bbot/test/test_step_2/module_tests/test_module_dnsbrute.py +++ b/bbot/test/test_step_2/module_tests/test_module_dnsbrute.py @@ -12,7 +12,9 @@ async def setup_after_prep(self, module_test): async def new_run_live(*command, check=False, text=True, **kwargs): if "massdns" in command[:2]: - yield """{"name": "asdf.blacklanternsecurity.com.", "type": "A", "class": "IN", "status": "NOERROR", "rx_ts": 1713974911725326170, "data": {"answers": [{"ttl": 86400, "type": "A", "class": "IN", "name": "asdf.blacklanternsecurity.com.", "data": "1.2.3.4."}]}, "flags": ["rd", "ra"], "resolver": "195.226.187.130:53", "proto": "UDP"}""" + _input = [l async for l in kwargs["input"]] + if "asdf.blacklanternsecurity.com" in _input: + yield """{"name": "asdf.blacklanternsecurity.com.", "type": "A", "class": "IN", "status": "NOERROR", "rx_ts": 1713974911725326170, "data": {"answers": [{"ttl": 86400, "type": "A", "class": "IN", "name": "asdf.blacklanternsecurity.com.", "data": "1.2.3.4."}]}, "flags": ["rd", "ra"], "resolver": "195.226.187.130:53", "proto": "UDP"}""" else: async for _ in old_run_live(*command, check=False, text=True, **kwargs): yield _ @@ -21,6 +23,7 @@ async def new_run_live(*command, check=False, text=True, **kwargs): await module_test.mock_dns( { + "blacklanternsecurity.com": {"A": ["4.3.2.1"]}, "asdf.blacklanternsecurity.com": {"A": ["1.2.3.4"]}, } ) diff --git a/bbot/test/test_step_2/module_tests/test_module_dnsbrute_mutations.py b/bbot/test/test_step_2/module_tests/test_module_dnsbrute_mutations.py index 2a56b2b65e..0a7627f25b 100644 --- a/bbot/test/test_step_2/module_tests/test_module_dnsbrute_mutations.py +++ b/bbot/test/test_step_2/module_tests/test_module_dnsbrute_mutations.py @@ -31,6 +31,7 @@ async def new_run_live(*command, check=False, text=True, **kwargs): await module_test.mock_dns( { + "blacklanternsecurity.com": {"A": ["1.2.3.4"]}, # targets "rrrr.blacklanternsecurity.com": {"A": ["1.2.3.4"]}, "asdff-ffdsa.blacklanternsecurity.com": {"A": ["1.2.3.4"]}, diff --git a/bbot/test/test_step_2/module_tests/test_module_dnscommonsrv.py b/bbot/test/test_step_2/module_tests/test_module_dnscommonsrv.py index 3d3d670e1e..8d54f4e3ad 100644 --- a/bbot/test/test_step_2/module_tests/test_module_dnscommonsrv.py +++ b/bbot/test/test_step_2/module_tests/test_module_dnscommonsrv.py @@ -10,7 +10,9 @@ async def setup_after_prep(self, module_test): async def new_run_live(*command, check=False, text=True, **kwargs): if "massdns" in command[:2]: - yield """{"name":"_ldap._tcp.gc._msdcs.blacklanternsecurity.com.","type":"SRV","class":"IN","status":"NOERROR","rx_ts":1713974911725326170,"data":{"answers":[{"ttl":86400,"type":"SRV","class":"IN","name":"_ldap._tcp.gc._msdcs.blacklanternsecurity.com.","data":"10 10 1720 asdf.blacklanternsecurity.com."},{"ttl":86400,"type":"SRV","class":"IN","name":"_ldap._tcp.gc._msdcs.blacklanternsecurity.com.","data":"10 10 1720 asdf.blacklanternsecurity.com."}]},"flags":["rd","ra"],"resolver":"195.226.187.130:53","proto":"UDP"}""" + _input = [l async for l in kwargs["input"]] + if "_ldap._tcp.gc._msdcs.blacklanternsecurity.com" in _input: + yield """{"name":"_ldap._tcp.gc._msdcs.blacklanternsecurity.com.","type":"SRV","class":"IN","status":"NOERROR","rx_ts":1713974911725326170,"data":{"answers":[{"ttl":86400,"type":"SRV","class":"IN","name":"_ldap._tcp.gc._msdcs.blacklanternsecurity.com.","data":"10 10 1720 asdf.blacklanternsecurity.com."},{"ttl":86400,"type":"SRV","class":"IN","name":"_ldap._tcp.gc._msdcs.blacklanternsecurity.com.","data":"10 10 1720 asdf.blacklanternsecurity.com."}]},"flags":["rd","ra"],"resolver":"195.226.187.130:53","proto":"UDP"}""" else: async for _ in old_run_live(*command, check=False, text=True, **kwargs): yield _