From 92b7a2203062ab299ab3ea72a31d447625e870b2 Mon Sep 17 00:00:00 2001 From: github-actions Date: Fri, 2 Aug 2024 14:30:30 -0400 Subject: [PATCH] dns optimizations WIP --- bbot/core/helpers/dns/dns.py | 43 +------ bbot/core/helpers/dns/engine.py | 172 +++++++++++++++++++--------- bbot/modules/internal/dnsresolve.py | 128 ++++++++++----------- 3 files changed, 189 insertions(+), 154 deletions(-) diff --git a/bbot/core/helpers/dns/dns.py b/bbot/core/helpers/dns/dns.py index 7e347ed69d..ff188862d0 100644 --- a/bbot/core/helpers/dns/dns.py +++ b/bbot/core/helpers/dns/dns.py @@ -111,40 +111,9 @@ def brute(self): self._brute = DNSBrute(self.parent_helper) return self._brute - async def is_wildcard(self, query, ips=None, rdtype=None): - """ - Use this method to check whether a *host* is a wildcard entry - - This can reliably tell the difference between a valid DNS record and a wildcard within a wildcard domain. - - If you want to know whether a domain is using wildcard DNS, use `is_wildcard_domain()` instead. - - Args: - query (str): The hostname to check for a wildcard entry. - ips (list, optional): List of IPs to compare against, typically obtained from a previous DNS resolution of the query. - rdtype (str, optional): The DNS record type (e.g., "A", "AAAA") to consider during the check. - - Returns: - dict: A dictionary indicating if the query is a wildcard for each checked DNS record type. - Keys are DNS record types like "A", "AAAA", etc. - Values are tuples where the first element is a boolean indicating if the query is a wildcard, - and the second element is the wildcard parent if it's a wildcard. - - Raises: - ValueError: If only one of `ips` or `rdtype` is specified or if no valid IPs are specified. - - Examples: - >>> is_wildcard("www.github.io") - {"A": (True, "github.io"), "AAAA": (True, "github.io")} - - >>> is_wildcard("www.evilcorp.com", ips=["93.184.216.34"], rdtype="A") - {"A": (False, "evilcorp.com")} - - Note: - `is_wildcard` can be True, False, or None (indicating that wildcard detection was inconclusive) - """ - if [ips, rdtype].count(None) == 1: - raise ValueError("Both ips and rdtype must be specified") + async def is_wildcard(self, query, dns_children=None, rdtype=None): + if [dns_children, rdtype].count(None) == 1: + raise ValueError("Both dns_children and rdtype must be specified") query = self._wildcard_prevalidation(query) if not query: @@ -154,14 +123,14 @@ async def is_wildcard(self, query, ips=None, rdtype=None): if is_domain(query): return {} - return await self.run_and_return("is_wildcard", query=query, ips=ips, rdtype=rdtype) + return await self.run_and_return("is_wildcard", query=query, dns_children=dns_children, rdtype=rdtype) - async def is_wildcard_domain(self, domain, log_info=False): + async def is_wildcard_domain(self, domain, dns_children=None, log_info=False): domain = self._wildcard_prevalidation(domain) if not domain: return {} - return await self.run_and_return("is_wildcard_domain", domain=domain, log_info=False) + return await self.run_and_return("is_wildcard_domain", domain=domain, dns_children=dns_children, log_info=False) def _wildcard_prevalidation(self, host): if self.wildcard_disable: diff --git a/bbot/core/helpers/dns/engine.py b/bbot/core/helpers/dns/engine.py index 91efca10dd..1a63af171c 100644 --- a/bbot/core/helpers/dns/engine.py +++ b/bbot/core/helpers/dns/engine.py @@ -75,7 +75,7 @@ def __init__(self, socket_path, config={}): self.wildcard_ignore = [] self.wildcard_ignore = tuple([str(d).strip().lower() for d in self.wildcard_ignore]) self.wildcard_tests = self.dns_config.get("wildcard_tests", 5) - self._wildcard_cache = dict() + self._wildcard_cache = LRUCache(maxsize=50000) # since wildcard detection takes some time, This is to prevent multiple # modules from kicking off wildcard detection for the same domain at the same time self._wildcard_lock = NamedLock() @@ -193,6 +193,7 @@ async def _resolve_hostname(self, query, **kwargs): >>> results, errors = await _resolve_hostname("google.com") (, []) """ + log.critical(query) self.debug(f"Resolving {query} with kwargs={kwargs}") results = [] errors = [] @@ -432,7 +433,7 @@ async def _catch(self, callback, *args, **kwargs): log.trace(traceback.format_exc()) return [] - async def is_wildcard(self, query, ips=None, rdtype=None): + async def is_wildcard(self, query, dns_children=None, rdtype=None): """ Use this method to check whether a *host* is a wildcard entry @@ -442,7 +443,7 @@ async def is_wildcard(self, query, ips=None, rdtype=None): Args: query (str): The hostname to check for a wildcard entry. - ips (list, optional): List of IPs to compare against, typically obtained from a previous DNS resolution of the query. + dns_children (dict, optional): Dictionary of {RDTYPE: [ip1, ip2]} to compare against, typically obtained from a previous DNS resolution of the query. rdtype (str, optional): The DNS record type (e.g., "A", "AAAA") to consider during the check. Returns: @@ -452,18 +453,19 @@ async def is_wildcard(self, query, ips=None, rdtype=None): and the second element is the wildcard parent if it's a wildcard. Raises: - ValueError: If only one of `ips` or `rdtype` is specified or if no valid IPs are specified. + ValueError: If only one of `dns_children` or `rdtype` is specified or if no valid IPs are specified. Examples: >>> is_wildcard("www.github.io") {"A": (True, "github.io"), "AAAA": (True, "github.io")} - >>> is_wildcard("www.evilcorp.com", ips=["93.184.216.34"], rdtype="A") + >>> is_wildcard("www.evilcorp.com", dns_children=["93.184.216.34"], rdtype="A") {"A": (False, "evilcorp.com")} Note: `is_wildcard` can be True, False, or None (indicating that wildcard detection was inconclusive) """ + log.critical(query) result = {} parent = parent_domain(query) @@ -478,7 +480,7 @@ async def is_wildcard(self, query, ips=None, rdtype=None): query_baseline = dict() # if the caller hasn't already done the work of resolving the IPs - if ips is None: + if dns_children is None: # then resolve the query for all rdtypes queries = [(query, t) for t in rdtypes_to_check] async for (query, _rdtype), (answers, errors) in self.resolve_raw_batch(queries): @@ -491,11 +493,12 @@ async def is_wildcard(self, query, ips=None, rdtype=None): result[_rdtype] = (None, parent) continue else: - # otherwise, we can skip all that - cleaned_ips = set([clean_dns_record(ip) for ip in ips]) - if not cleaned_ips: - raise ValueError("Valid IPs must be specified") - query_baseline[rdtype] = cleaned_ips + for _rdtype, ips in dns_children.items(): + # otherwise, we can skip all that + cleaned_ips = set([clean_dns_record(ip) for ip in ips]) + if not cleaned_ips: + raise ValueError("Valid IPs must be specified") + query_baseline[_rdtype] = cleaned_ips if not query_baseline: return result @@ -506,7 +509,7 @@ async def is_wildcard(self, query, ips=None, rdtype=None): try: for host in parents[::-1]: # make sure we've checked that domain for wildcards - await self.is_wildcard_domain(host) + await self.is_wildcard_domain(host, ) # for every rdtype for _rdtype in list(query_baseline): @@ -545,7 +548,7 @@ async def is_wildcard(self, query, ips=None, rdtype=None): return result - async def is_wildcard_domain(self, domain, log_info=False): + async def is_wildcard_domain(self, domain, log_info=False, rdtype=None): """ Check whether a given host or its children make use of wildcard DNS entries. Wildcard DNS can have various implications, particularly in subdomain enumeration and subdomain takeovers. @@ -566,59 +569,124 @@ async def is_wildcard_domain(self, domain, log_info=False): >>> is_wildcard_domain("example.com") {} """ + # TODO: combine wildcard_domain_results and wildcard_rdtypes wildcard_domain_results = {} + log.critical(domain) - rdtypes_to_check = set(all_rdtypes) + if rdtype is not None: + if isinstance(rdtype, str): + rdtype = [rdtype] + rdtypes_to_check = rdtype + else: + rdtypes_to_check = set(all_rdtypes) # make a list of its parents parents = list(domain_parents(domain, include_self=True)) # and check each of them, beginning with the highest parent (i.e. the root domain) + wildcard_rdtypes = [] for i, host in enumerate(parents[::-1]): - # have we checked this host before? - host_hash = hash(host) - async with self._wildcard_lock.lock(host_hash): - # if we've seen this host before - if host_hash in self._wildcard_cache: - wildcard_domain_results[host] = self._wildcard_cache[host_hash] + + for _rdtype in rdtypes_to_check: + # don't check this rdtype if it's a known wildcard at a higher level + if _rdtype in wildcard_rdtypes: continue - log.verbose(f"Checking if {host} is a wildcard") + hash_key = hash(f"{host}:{_rdtype}") - # determine if this is a wildcard domain + # because we cache results, we only want one execution at a time for this host:rdtype + async with self._wildcard_lock.lock(hash_key): - # resolve a bunch of random subdomains of the same parent - is_wildcard = False - wildcard_results = dict() + # if we've seen this host before, load it from cache + if hash_key in self._wildcard_cache: + wildcard_hosts = self._wildcard_cache[hash_key] + # if this rdtype is a wildcard, we can skip any further checks on it + if wildcard_hosts: + wildcard_rdtypes.append(_rdtype) + wildcard_domain_results[host] = wildcard_hosts + log.hugesuccess(f"{host}:{_rdtype} was in cache: {wildcard_hosts}") + continue + + log.hugewarning(f"Checking if {host}:{_rdtype} is a wildcard") - rand_queries = [] - for rdtype in rdtypes_to_check: + # resolve a bunch of random subdomains of the same parent + wildcard_results = set() + rand_queries = [] for _ in range(self.wildcard_tests): rand_query = f"{rand_string(digits=False, length=10)}.{host}" - rand_queries.append((rand_query, rdtype)) - - async for (query, rdtype), (answers, errors) in self.resolve_raw_batch(rand_queries, use_cache=False): - answers = extract_targets(answers) - if answers: - is_wildcard = True - if not rdtype in wildcard_results: - wildcard_results[rdtype] = set() - wildcard_results[rdtype].update(set(a[1] for a in answers)) - # we know this rdtype is a wildcard - # so we don't need to check it anymore - with suppress(KeyError): - rdtypes_to_check.remove(rdtype) - - self._wildcard_cache.update({host_hash: wildcard_results}) - wildcard_domain_results.update({host: wildcard_results}) - if is_wildcard: - wildcard_rdtypes_str = ",".join(sorted([t.upper() for t, r in wildcard_results.items() if r])) - log_fn = log.verbose - if log_info: - log_fn = log.info - log_fn(f"Encountered domain with wildcard DNS ({wildcard_rdtypes_str}): {host}") - else: - log.verbose(f"Finished checking {host}, it is not a wildcard") - + rand_queries.append(rand_query) + async for (query, results) in self.resolve_batch(rand_queries, type=_rdtype, use_cache=False): + wildcard_results.update(results) + if results: + # we know this rdtype is a wildcard + # so we don't need to check it anymore + wildcard_rdtypes.append(_rdtype) + + self._wildcard_cache[hash_key] = wildcard_results + try: + wildcard_domain_result = wildcard_domain_results[host] + except KeyError: + wildcard_domain_result = {} + wildcard_domain_results[host] = wildcard_domain_result + wildcard_domain_result[_rdtype] = wildcard_results + + if wildcard_domain_result: + wildcard_rdtypes_str = ",".join(sorted([t.upper() for t, r in wildcard_domain_result.items() if r])) + # log_fn = log.verbose + # if log_info: + # log_fn = log.info + log_fn = log.critical + log_fn(f"Encountered domain with wildcard DNS ({wildcard_rdtypes_str}): {host}") + else: + log.hugesuccess(f"Finished checking {host}, it is not a wildcard") + + # async with self._wildcard_lock.lock(host_hash): + # # if we've seen this host before + # if host_hash in self._wildcard_cache: + # wildcard_domain_results[host] = self._wildcard_cache[host_hash] + # continue + + # log.verbose(f"Checking if {host} is a wildcard") + + # # determine if this is a wildcard domain + + # # resolve a bunch of random subdomains of the same parent + # is_wildcard = False + # wildcard_results = dict() + + # rand_queries = [] + # for rdtype in rdtypes_to_check: + # host_hash = hash(f"{host}:{rdtype}") + # if host_hash in self._wildcard_cache: + # wildcard_results[rdtype].update(self._wildcard_cache[host_hash]) + # continue + # for _ in range(self.wildcard_tests): + # rand_query = f"{rand_string(digits=False, length=10)}.{host}" + # rand_queries.append((rand_query, rdtype)) + + # async for (query, rdtype), (answers, errors) in self.resolve_raw_batch(rand_queries, use_cache=False): + # answers = extract_targets(answers) + # if answers: + # is_wildcard = True + # if not rdtype in wildcard_results: + # wildcard_results[rdtype] = set() + # wildcard_results[rdtype].update(set(a[1] for a in answers)) + # # we know this rdtype is a wildcard + # # so we don't need to check it anymore + # with suppress(KeyError): + # rdtypes_to_check.remove(rdtype) + + # self._wildcard_cache.update({host_hash: wildcard_results}) + # wildcard_domain_results.update({host: wildcard_results}) + # if is_wildcard: + # wildcard_rdtypes_str = ",".join(sorted([t.upper() for t, r in wildcard_results.items() if r])) + # log_fn = log.verbose + # if log_info: + # log_fn = log.info + # log_fn(f"Encountered domain with wildcard DNS ({wildcard_rdtypes_str}): {host}") + # else: + # log.verbose(f"Finished checking {host}, it is not a wildcard") + + log.hugeinfo(wildcard_domain_results) return wildcard_domain_results @property diff --git a/bbot/modules/internal/dnsresolve.py b/bbot/modules/internal/dnsresolve.py index fcf7e90afd..fdf9fefc6c 100644 --- a/bbot/modules/internal/dnsresolve.py +++ b/bbot/modules/internal/dnsresolve.py @@ -65,6 +65,7 @@ async def filter_event(self, event): async def handle_event(self, event, **kwargs): dns_tags = set() + raw_record_events = [] event_whitelisted = False event_blacklisted = False @@ -73,17 +74,8 @@ async def handle_event(self, event, **kwargs): event_host = str(event.host) event_host_hash = hash(event_host) - async with self._event_cache_locks.lock(event_host_hash): - # first thing we do is check for wildcards - if not event_is_ip: - if event.scope_distance <= self.scan.scope_search_distance: - await self.handle_wildcard_event(event) - - event_host = str(event.host) - event_host_hash = hash(event_host) - # we do DNS resolution inside a lock to make sure we don't duplicate work - # once the resolution happens, it will be cached so it doesn't need to happen again + # once the resolution happens, its results will be cached so it doesn't need to happen again async with self._event_cache_locks.lock(event_host_hash): try: # try to get from cache @@ -104,10 +96,9 @@ async def handle_event(self, event, **kwargs): else: rdtypes_to_resolve = all_rdtypes - # if missing from cache, do DNS resolution + # first, we do DNS resolution queries = [(event_host, rdtype) for rdtype in rdtypes_to_resolve] error_rdtypes = [] - raw_record_events = [] async for (query, rdtype), (answer, errors) in self.helpers.dns.resolve_raw_batch(queries): if self.emit_raw_records and rdtype not in ("A", "AAAA", "CNAME", "PTR"): raw_record_event = self.make_event( @@ -176,50 +167,6 @@ async def handle_event(self, event, **kwargs): for host in children: main_host_event._resolved_hosts.add(host) - # if we're not blacklisted, emit the main host event and all its raw records - if not event_blacklisted: - if event_whitelisted: - self.debug( - f"Making {main_host_event} in-scope because it resolves to an in-scope resource (A/AAAA)" - ) - main_host_event.scope_distance = 0 - await self.handle_wildcard_event(main_host_event) - - if event != main_host_event: - await self.emit_event(main_host_event) - for raw_record_event in raw_record_events: - await self.emit_event(raw_record_event) - - # kill runaway DNS chains - dns_resolve_distance = getattr(event, "dns_resolve_distance", 0) - if dns_resolve_distance >= self.helpers.dns.runaway_limit: - self.debug( - f"Skipping DNS children for {event} because their DNS resolve distances would be greater than the configured value for this scan ({self.helpers.dns.runaway_limit})" - ) - main_host_event.dns_children = {} - - # emit DNS children - if not self.minimal: - in_dns_scope = -1 < event.scope_distance < self._dns_search_distance - for rdtype, records in main_host_event.dns_children.items(): - module = self.scan._make_dummy_module_dns(rdtype) - for record in records: - try: - child_event = self.scan.make_event( - record, "DNS_NAME", module=module, parent=main_host_event - ) - child_event.discovery_context = f"{rdtype} record for {event.host} contains {child_event.type}: {child_event.host}" - # if it's a hostname and it's only one hop away, mark it as affiliate - if child_event.type == "DNS_NAME" and child_event.scope_distance == 1: - child_event.add_tag("affiliate") - if in_dns_scope or self.preset.in_scope(child_event): - self.debug(f"Queueing DNS child for {event}: {child_event}") - await self.emit_event(child_event) - except ValidationError as e: - self.warning( - f'Event validation failed for DNS child of {main_host_event}: "{record}" ({rdtype}): {e}' - ) - # store results in cache self._event_cache[event_host_hash] = main_host_event, dns_tags, event_whitelisted, event_blacklisted @@ -229,9 +176,56 @@ async def handle_event(self, event, **kwargs): # if the event resolves to an in-scope IP, set its scope distance to 0 if event_whitelisted: - self.debug(f"Making {event} in-scope because it resolves to an in-scope resource") - event.scope_distance = 0 - await self.handle_wildcard_event(event) + self.debug( + f"Making {main_host_event} in-scope because it resolves to an in-scope resource (A/AAAA)" + ) + main_host_event.scope_distance = 0 + if event != main_host_event: + self.debug(f"Making {event} in-scope because it resolves to an in-scope resource (A/AAAA)") + event.scope_distance = 0 + + # if the event is within our scan's search distance, handle wildcard + if event.scope_distance <= self.scan.scope_search_distance: + rdtypes_to_check = list(main_host_event.dns_children) + self.hugeinfo(f"Checking {rdtypes_to_check}") + await self.handle_wildcard_event(main_host_event) + + # emit the main host and its raw records + if event != main_host_event: + await self.emit_event(main_host_event) + for raw_record_event in raw_record_events: + await self.emit_event(raw_record_event) + + # kill runaway DNS chains + dns_resolve_distance = getattr(event, "dns_resolve_distance", 0) + if dns_resolve_distance >= self.helpers.dns.runaway_limit: + self.debug( + f"Skipping DNS children for {event} because their DNS resolve distances would be greater than the configured value for this scan ({self.helpers.dns.runaway_limit})" + ) + main_host_event.dns_children = {} + + # emit DNS children + if not self.minimal: + in_dns_scope = -1 < event.scope_distance < self._dns_search_distance + for rdtype, records in main_host_event.dns_children.items(): + module = self.scan._make_dummy_module_dns(rdtype) + for record in records: + try: + child_event = self.scan.make_event( + record, "DNS_NAME", module=module, parent=main_host_event + ) + child_event.discovery_context = f"{rdtype} record for {event.host} contains {child_event.type}: {child_event.host}" + # if it's a hostname and it's only one hop away, mark it as affiliate + if child_event.type == "DNS_NAME" and child_event.scope_distance == 1: + child_event.add_tag("affiliate") + if in_dns_scope or self.preset.in_scope(child_event): + self.debug(f"Queueing DNS child for {event}: {child_event}") + await self.emit_event(child_event) + except ValidationError as e: + self.warning( + f'Event validation failed for DNS child of {main_host_event}: "{record}" ({rdtype}): {e}' + ) + # transfer resolved hosts event._resolved_hosts = main_host_event._resolved_hosts @@ -242,20 +236,25 @@ async def handle_event(self, event, **kwargs): async def handle_wildcard_event(self, event): self.debug(f"Entering handle_wildcard_event({event})") + tags = set() + rdtypes_to_check = list(event.dns_children) + if not rdtypes_to_check: + return False, "", tags + self.hugeinfo(f'{event.host}: Checking rdtypes {rdtypes_to_check}') try: event_host = str(event.host) # check if the dns name itself is a wildcard entry - wildcard_rdtypes = await self.helpers.is_wildcard(event_host) + wildcard_rdtypes = await self.helpers.is_wildcard(event_host, dns_children=event.dns_children, rdtype=rdtypes_to_check) for rdtype, (is_wildcard, wildcard_host) in wildcard_rdtypes.items(): if is_wildcard == False: continue elif is_wildcard == True: - event.add_tag("wildcard") + tags.add("wildcard") wildcard_tag = "wildcard" elif is_wildcard == None: wildcard_tag = "error" - event.add_tag(f"{rdtype.lower()}-{wildcard_tag}") + tags.add(f"{rdtype.lower()}-{wildcard_tag}") # wildcard event modification (www.evilcorp.com --> _wildcard.evilcorp.com) if wildcard_rdtypes and not "target" in event.tags: @@ -275,11 +274,10 @@ async def handle_wildcard_event(self, event): break wildcard_data = f"_wildcard.{wildcard_parent}" if wildcard_data != event.data: - self.debug(f'Wildcard detected, changing event.data "{event.data}" --> "{wildcard_data}"') - event.data = wildcard_data - + return event_is_wildcard, wildcard_data, tags finally: self.debug(f"Finished handle_wildcard_event({event})") + return False, "", tags def get_dns_parent(self, event): """