diff --git a/bbot/core/engine.py b/bbot/core/engine.py index d8dd1af28..52d4b871a 100644 --- a/bbot/core/engine.py +++ b/bbot/core/engine.py @@ -63,13 +63,14 @@ def unpickle(self, binary): async def _infinite_retry(self, callback, *args, **kwargs): interval = kwargs.pop("_interval", 10) + context = kwargs.pop("_context", "") + if not context: + context = f"{callback.__name__}({args}, {kwargs})" while not self._shutdown_status: try: return await asyncio.wait_for(callback(*args, **kwargs), timeout=interval) except (TimeoutError, asyncio.TimeoutError): - self.log.debug( - f"{self.name}: Timeout waiting for response for {callback.__name__}({args}, {kwargs}), retrying..." - ) + self.log.debug(f"{self.name}: Timeout waiting for response for {context}, retrying...") class EngineClient(EngineBase): @@ -144,10 +145,10 @@ async def run_and_return(self, command, *args, **kwargs): if message is error_sentinel: return await self._infinite_retry(socket.send, message) - binary = await self._infinite_retry(socket.recv) + binary = await self._infinite_retry(socket.recv, _context=f"waiting for return value from {fn_str}") except BaseException: try: - await self.send_cancel_message(socket) + await self.send_cancel_message(socket, fn_str) except Exception: self.log.debug(f"{self.name}: {fn_str} failed to send cancel message after exception") self.log.trace(traceback.format_exc()) @@ -176,7 +177,9 @@ async def run_and_yield(self, command, *args, **kwargs): await socket.send(message) while 1: try: - binary = await self._infinite_retry(socket.recv) + binary = await self._infinite_retry( + socket.recv, _context=f"waiting for new iteration from {fn_str}" + ) # self.log.debug(f"{self.name}.{command}({kwargs}) got binary: {binary}") message = self.unpickle(binary) self.log.debug(f"{self.name} {command} got iteration: {message}") @@ -188,13 +191,13 @@ async def run_and_yield(self, command, *args, **kwargs): exc_name = e.__class__.__name__ self.log.debug(f"{self.name}.{command} got {exc_name}") try: - await self.send_cancel_message(socket) + await self.send_cancel_message(socket, fn_str) except Exception: self.log.debug(f"{self.name}.{command} failed to send cancel message after {exc_name}") self.log.trace(traceback.format_exc()) break - async def send_cancel_message(self, socket): + async def send_cancel_message(self, socket, context): """ Send a cancel message and wait for confirmation from the server """ @@ -202,7 +205,7 @@ async def send_cancel_message(self, socket): message = pickle.dumps({"c": -1}) await self._infinite_retry(socket.send, message) while 1: - response = await self._infinite_retry(socket.recv) + response = await self._infinite_retry(socket.recv, _context=f"waiting for CANCEL_OK from {context}") response = pickle.loads(response) if isinstance(response, dict): response = response.get("m", "") diff --git a/bbot/core/event/base.py b/bbot/core/event/base.py index cda4975f3..bcf41a37c 100644 --- a/bbot/core/event/base.py +++ b/bbot/core/event/base.py @@ -164,10 +164,12 @@ def __init__( self._priority = None self._parent_id = None self._host_original = None + self._scope_distance = None self._module_priority = None self._resolved_hosts = set() self.dns_children = dict() self._discovery_context = "" + self.web_spider_distance = 0 # for creating one-off events without enforcing parent requirement self._dummy = _dummy @@ -199,8 +201,6 @@ def __init__( if self.scan: self.scans = list(set([self.scan.id] + self.scans)) - self._scope_distance = -1 - try: self.data = self._sanitize_data(data) except Exception as e: @@ -214,9 +214,6 @@ def __init__( if (not self.parent) and (not self._dummy): raise ValidationError(f"Must specify event parent") - # inherit web spider distance from parent - self.web_spider_distance = getattr(self.parent, "web_spider_distance", 0) - if tags is not None: for tag in tags: self.add_tag(tag) @@ -435,29 +432,29 @@ def scope_distance(self, scope_distance): Note: The method will automatically update the relevant 'distance-' tags associated with the event. """ - if scope_distance >= 0: - new_scope_distance = None - # ensure scope distance does not increase (only allow setting to smaller values) - if self.scope_distance == -1: - new_scope_distance = scope_distance + if scope_distance < 0: + raise ValueError(f"Invalid scope distance: {scope_distance}") + # ensure scope distance does not increase (only allow setting to smaller values) + if self.scope_distance is None: + new_scope_distance = scope_distance + else: + new_scope_distance = min(self.scope_distance, scope_distance) + if self._scope_distance != new_scope_distance: + # remove old scope distance tags + for t in list(self.tags): + if t.startswith("distance-"): + self.remove_tag(t) + if scope_distance == 0: + self.add_tag("in-scope") + self.remove_tag("affiliate") else: - new_scope_distance = min(self.scope_distance, scope_distance) - if self._scope_distance != new_scope_distance: - # remove old scope distance tags - for t in list(self.tags): - if t.startswith("distance-"): - self.remove_tag(t) - if scope_distance == 0: - self.add_tag("in-scope") - self.remove_tag("affiliate") - else: - self.remove_tag("in-scope") - self.add_tag(f"distance-{new_scope_distance}") - self._scope_distance = new_scope_distance - # apply recursively to parent events - parent_scope_distance = getattr(self.parent, "scope_distance", -1) - if parent_scope_distance >= 0 and self != self.parent: - self.parent.scope_distance = scope_distance + 1 + self.remove_tag("in-scope") + self.add_tag(f"distance-{new_scope_distance}") + self._scope_distance = new_scope_distance + # apply recursively to parent events + parent_scope_distance = getattr(self.parent, "scope_distance", None) + if parent_scope_distance is not None and self != self.parent: + self.parent.scope_distance = scope_distance + 1 @property def scope_description(self): @@ -493,20 +490,27 @@ def parent(self, parent): """ if is_event(parent): self._parent = parent - hosts_are_same = self.host and (self.host == parent.host) - if parent.scope_distance >= 0: - new_scope_distance = int(parent.scope_distance) + hosts_are_same = (self.host and parent.host) and (self.host == parent.host) + new_scope_distance = int(parent.scope_distance) + if self.host and parent.scope_distance is not None: # only increment the scope distance if the host changes if self._scope_distance_increment_same_host or not hosts_are_same: new_scope_distance += 1 - self.scope_distance = new_scope_distance + self.scope_distance = new_scope_distance # inherit certain tags if hosts_are_same: + # inherit web spider distance from parent + self.web_spider_distance = getattr(parent, "web_spider_distance", 0) + event_has_url = getattr(self, "parsed_url", None) is not None for t in parent.tags: - if t == "affiliate": - self.add_tag("affiliate") + if t in ("affiliate",): + self.add_tag(t) elif t.startswith("mutation-"): self.add_tag(t) + # only add these tags if the event has a URL + if event_has_url: + if t in ("spider-danger", "spider-max"): + self.add_tag(t) elif not self._dummy: log.warning(f"Tried to set invalid parent on {self}: (got: {parent})") @@ -539,9 +543,11 @@ def get_parent(self): return self.parent.get_parent() return self.parent - def get_parents(self, omit=False): + def get_parents(self, omit=False, include_self=False): parents = [] e = self + if include_self: + parents.append(self) while 1: if omit: parent = e.get_parent() @@ -1098,12 +1104,13 @@ def sanitize_data(self, data): return data def add_tag(self, tag): - if tag == "spider-danger": + host_same_as_parent = self.parent and self.host == self.parent.host + if tag == "spider-danger" and host_same_as_parent and not "spider-danger" in self.tags: # increment the web spider distance if self.type == "URL_UNVERIFIED": self.web_spider_distance += 1 - if self.is_spider_max: - self.add_tag("spider-max") + if self.is_spider_max: + self.add_tag("spider-max") super().add_tag(tag) @property diff --git a/bbot/core/helpers/dns/engine.py b/bbot/core/helpers/dns/engine.py index 883553607..91efca10d 100644 --- a/bbot/core/helpers/dns/engine.py +++ b/bbot/core/helpers/dns/engine.py @@ -373,12 +373,12 @@ def new_task(query): if queries: # Start a new task for each one completed, if URLs remain new_task(queries.pop(0)) - async def resolve_raw_batch(self, queries, threads=10): + async def resolve_raw_batch(self, queries, threads=10, **kwargs): tasks = {} client_id = self.client_id_var.get() def new_task(query, rdtype): - task = self.new_child_task(client_id, self.resolve_raw(query, type=rdtype)) + task = self.new_child_task(client_id, self.resolve_raw(query, type=rdtype, **kwargs)) tasks[task] = (query, rdtype) queries = list(queries) @@ -469,7 +469,12 @@ async def is_wildcard(self, query, ips=None, rdtype=None): parent = parent_domain(query) parents = list(domain_parents(query)) - rdtypes_to_check = [rdtype] if rdtype is not None else all_rdtypes + if rdtype is not None: + if isinstance(rdtype, str): + rdtype = [rdtype] + rdtypes_to_check = rdtype + else: + rdtypes_to_check = all_rdtypes query_baseline = dict() # if the caller hasn't already done the work of resolving the IPs @@ -534,6 +539,10 @@ async def is_wildcard(self, query, ips=None, rdtype=None): except DNSWildcardBreak: pass + for _rdtype, answers in query_baseline.items(): + if answers and _rdtype not in result: + result[_rdtype] = (False, query) + return result async def is_wildcard_domain(self, domain, log_info=False): @@ -581,13 +590,13 @@ async def is_wildcard_domain(self, domain, log_info=False): is_wildcard = False wildcard_results = dict() - queries = [] + rand_queries = [] for rdtype in rdtypes_to_check: for _ in range(self.wildcard_tests): rand_query = f"{rand_string(digits=False, length=10)}.{host}" - queries.append((rand_query, rdtype)) + rand_queries.append((rand_query, rdtype)) - async for (query, rdtype), (answers, errors) in self.resolve_raw_batch(queries): + async for (query, rdtype), (answers, errors) in self.resolve_raw_batch(rand_queries, use_cache=False): answers = extract_targets(answers) if answers: is_wildcard = True diff --git a/bbot/modules/base.py b/bbot/modules/base.py index 9b43b1d2f..42426c2ac 100644 --- a/bbot/modules/base.py +++ b/bbot/modules/base.py @@ -1211,7 +1211,7 @@ def log_table(self, *args, **kwargs): return table def _is_graph_important(self, event): - return self.preserve_graph and getattr(event, "_graph_important", False) + return self.preserve_graph and getattr(event, "_graph_important", False) and not getattr(event, "_omit", False) @property def preserve_graph(self): @@ -1380,7 +1380,7 @@ def error(self, *args, trace=True, **kwargs): if trace: self.trace() - def trace(self): + def trace(self, msg=None): """Logs the stack trace of the most recently caught exception. This method captures the type, value, and traceback of the most recent exception and logs it using the trace level. It is typically used for debugging purposes. @@ -1393,9 +1393,12 @@ def trace(self): >>> except ZeroDivisionError: >>> self.trace() """ - e_type, e_val, e_traceback = exc_info() - if e_type is not None: - self.log.trace(traceback.format_exc()) + if msg is None: + e_type, e_val, e_traceback = exc_info() + if e_type is not None: + self.log.trace(traceback.format_exc()) + else: + self.log.trace(msg) def critical(self, *args, trace=True, **kwargs): """Logs a whole message in emboldened red text, and optionally the stack trace of the most recent exception. @@ -1454,8 +1457,6 @@ async def _worker(self): await self.finish() continue - self.debug(f"Got {event} from {getattr(event, 'module', 'unknown_module')}") - acceptable = True async with self._task_counter.count(f"event_precheck({event})"): precheck_pass, reason = self._event_precheck(event) @@ -1482,12 +1483,11 @@ async def _worker(self): with suppress(ValueError, TypeError): forward_event, forward_event_reason = forward_event - self.debug(f"Finished intercepting {event}") - if forward_event is False: self.debug(f"Not forwarding {event} because {forward_event_reason}") continue + self.debug(f"Forwarding {event}") await self.forward_event(event, kwargs) except asyncio.CancelledError: diff --git a/bbot/modules/code_repository.py b/bbot/modules/code_repository.py index ef76954a9..372c73b08 100644 --- a/bbot/modules/code_repository.py +++ b/bbot/modules/code_repository.py @@ -49,7 +49,6 @@ async def handle_event(self, event): tags=platform, parent=event, ) - repo_event.scope_distance = event.scope_distance await self.emit_event( repo_event, context=f"{{module}} detected {platform} {{event.type}} at {url}", diff --git a/bbot/modules/docker_pull.py b/bbot/modules/docker_pull.py index 85f31aae8..987651fcd 100644 --- a/bbot/modules/docker_pull.py +++ b/bbot/modules/docker_pull.py @@ -60,7 +60,6 @@ async def handle_event(self, event): parent=event, ) if codebase_event: - codebase_event.scope_distance = event.scope_distance await self.emit_event( codebase_event, context=f"{{module}} downloaded Docker image to {{event.type}}: {repo_path}" ) diff --git a/bbot/modules/filedownload.py b/bbot/modules/filedownload.py index 37092be06..7649be4d3 100644 --- a/bbot/modules/filedownload.py +++ b/bbot/modules/filedownload.py @@ -144,7 +144,6 @@ async def download_file(self, url, content_type=None, source_event=None): file_event = self.make_event( {"path": str(file_destination)}, "FILESYSTEM", tags=["filedownload", "file"], parent=source_event ) - file_event.scope_distance = source_event.scope_distance await self.emit_event(file_event) self.urls_downloaded.add(hash(url)) diff --git a/bbot/modules/git_clone.py b/bbot/modules/git_clone.py index 3961ea920..6cda79f9d 100644 --- a/bbot/modules/git_clone.py +++ b/bbot/modules/git_clone.py @@ -40,7 +40,6 @@ async def handle_event(self, event): if repo_path: self.verbose(f"Cloned {repo_url} to {repo_path}") codebase_event = self.make_event({"path": str(repo_path)}, "FILESYSTEM", tags=["git"], parent=event) - codebase_event.scope_distance = event.scope_distance await self.emit_event( codebase_event, context=f"{{module}} downloaded git repo at {repo_url} to {{event.type}}: {repo_path}", diff --git a/bbot/modules/github_codesearch.py b/bbot/modules/github_codesearch.py index ddafb025f..cca7e3cff 100644 --- a/bbot/modules/github_codesearch.py +++ b/bbot/modules/github_codesearch.py @@ -35,7 +35,6 @@ async def handle_event(self, event): url_event = self.make_event(raw_url, "URL_UNVERIFIED", parent=repo_event, tags=["httpx-safe"]) if not url_event: continue - url_event.scope_distance = repo_event.scope_distance await self.emit_event( url_event, context=f'file matching query "{query}" is at {{event.type}}: {raw_url}' ) diff --git a/bbot/modules/github_org.py b/bbot/modules/github_org.py index 1d115b925..90fba82b8 100644 --- a/bbot/modules/github_org.py +++ b/bbot/modules/github_org.py @@ -62,7 +62,6 @@ async def handle_event(self, event): repo_event = self.make_event({"url": repo_url}, "CODE_REPOSITORY", tags="git", parent=event) if not repo_event: continue - repo_event.scope_distance = event.scope_distance await self.emit_event( repo_event, context=f"{{module}} listed repos for GitHub profile and discovered {{event.type}}: {repo_url}", @@ -97,7 +96,6 @@ async def handle_event(self, event): event_data = {"platform": "github", "profile_name": user, "url": user_url} github_org_event = self.make_event(event_data, "SOCIAL", tags="github-org", parent=event) if github_org_event: - github_org_event.scope_distance = event.scope_distance await self.emit_event( github_org_event, context=f'{{module}} tried "{user}" as GitHub profile and discovered {{event.type}}: {user_url}', diff --git a/bbot/modules/github_workflows.py b/bbot/modules/github_workflows.py index 76ed2d5ff..15767ca63 100644 --- a/bbot/modules/github_workflows.py +++ b/bbot/modules/github_workflows.py @@ -58,7 +58,6 @@ async def handle_event(self, event): tags=["textfile"], parent=event, ) - logfile_event.scope_distance = event.scope_distance await self.emit_event( logfile_event, context=f"{{module}} downloaded workflow run logs from {workflow_url} to {{event.type}}: {log}", diff --git a/bbot/modules/gitlab.py b/bbot/modules/gitlab.py index 3404f3ba3..dcdc841b5 100644 --- a/bbot/modules/gitlab.py +++ b/bbot/modules/gitlab.py @@ -99,7 +99,6 @@ async def handle_projects_url(self, projects_url, event): project_url = project.get("web_url", "") if project_url: code_event = self.make_event({"url": project_url}, "CODE_REPOSITORY", tags="git", parent=event) - code_event.scope_distance = event.scope_distance await self.emit_event( code_event, context=f"{{module}} enumerated projects and found {{event.type}} at {project_url}" ) @@ -133,7 +132,6 @@ async def handle_namespace(self, namespace, event): "SOCIAL", parent=event, ) - social_event.scope_distance = event.scope_distance await self.emit_event( social_event, context=f'{{module}} found GitLab namespace ({{event.type}}) "{namespace_name}" at {namespace_url}', diff --git a/bbot/modules/httpx.py b/bbot/modules/httpx.py index eb0cd376f..deda243a0 100644 --- a/bbot/modules/httpx.py +++ b/bbot/modules/httpx.py @@ -44,7 +44,7 @@ class httpx(BaseModule): } ] - scope_distance_modifier = 1 + scope_distance_modifier = 2 _shuffle_incoming_queue = False _batch_size = 500 _priority = 2 @@ -72,8 +72,10 @@ async def filter_event(self, event): # scope filtering in_scope_only = self.config.get("in_scope_only", True) - safe_to_visit = "httpx-safe" in event.tags - if not safe_to_visit and (in_scope_only and not self.scan.in_scope(event)): + if "httpx-safe" in event.tags: + return True + max_scope_distance = 0 if in_scope_only else (self.scan.scope_search_distance + 1) + if event.scope_distance > max_scope_distance: return False, "event is not in scope" return True @@ -93,7 +95,6 @@ def make_url_metadata(self, event): return url, url_hash def _incoming_dedup_hash(self, event): - url, url_hash = self.make_url_metadata(event) return url_hash diff --git a/bbot/modules/internal/dnsresolve.py b/bbot/modules/internal/dnsresolve.py index 6efe4ff1f..fcf7e90af 100644 --- a/bbot/modules/internal/dnsresolve.py +++ b/bbot/modules/internal/dnsresolve.py @@ -65,22 +65,35 @@ async def filter_event(self, event): async def handle_event(self, event, **kwargs): dns_tags = set() - dns_children = dict() event_whitelisted = False event_blacklisted = False - emit_children = False - event_host = str(event.host) - event_host_hash = hash(str(event.host)) event_is_ip = self.helpers.is_ip(event.host) + event_host = str(event.host) + event_host_hash = hash(event_host) + + async with self._event_cache_locks.lock(event_host_hash): + # first thing we do is check for wildcards + if not event_is_ip: + if event.scope_distance <= self.scan.scope_search_distance: + await self.handle_wildcard_event(event) + + event_host = str(event.host) + event_host_hash = hash(event_host) + # we do DNS resolution inside a lock to make sure we don't duplicate work # once the resolution happens, it will be cached so it doesn't need to happen again async with self._event_cache_locks.lock(event_host_hash): try: # try to get from cache - dns_tags, dns_children, event_whitelisted, event_blacklisted = self._event_cache[event_host_hash] + # the "main host event" is the original parent IP_ADDRESS or DNS_NAME + main_host_event, dns_tags, event_whitelisted, event_blacklisted = self._event_cache[event_host_hash] + # dns_tags, dns_children, event_whitelisted, event_blacklisted = self._event_cache[event_host_hash] except KeyError: + + main_host_event = self.get_dns_parent(event) + rdtypes_to_resolve = () if event_is_ip: if not self.minimal: @@ -94,47 +107,55 @@ async def handle_event(self, event, **kwargs): # if missing from cache, do DNS resolution queries = [(event_host, rdtype) for rdtype in rdtypes_to_resolve] error_rdtypes = [] + raw_record_events = [] async for (query, rdtype), (answer, errors) in self.helpers.dns.resolve_raw_batch(queries): if self.emit_raw_records and rdtype not in ("A", "AAAA", "CNAME", "PTR"): - await self.emit_event( + raw_record_event = self.make_event( {"host": str(event_host), "type": rdtype, "answer": answer.to_text()}, "RAW_DNS_RECORD", - parent=event, + parent=main_host_event, tags=[f"{rdtype.lower()}-record"], context=f"{rdtype} lookup on {{event.parent.host}} produced {{event.type}}", ) + raw_record_events.append(raw_record_event) if errors: error_rdtypes.append(rdtype) for _rdtype, host in extract_targets(answer): dns_tags.add(f"{rdtype.lower()}-record") try: - dns_children[_rdtype].add(host) + main_host_event.dns_children[_rdtype].add(host) except KeyError: - dns_children[_rdtype] = {host} + main_host_event.dns_children[_rdtype] = {host} + # if there were dns resolution errors, notify the user with tags for rdtype in error_rdtypes: - if rdtype not in dns_children: + if rdtype not in main_host_event.dns_children: dns_tags.add(f"{rdtype.lower()}-error") - if not dns_children and not event_is_ip: + # if there weren't any DNS children and it's not an IP address, tag as unresolved + if not main_host_event.dns_children and not event_is_ip: dns_tags.add("unresolved") - for rdtype, children in dns_children.items(): + # check DNS children against whitelists and blacklists + for rdtype, children in main_host_event.dns_children.items(): if event_blacklisted: break for host in children: # whitelisting / blacklisting based on resolved hosts if rdtype in ("A", "AAAA", "CNAME"): # having a CNAME to an in-scope resource doesn't make you in-scope - if not event_whitelisted and rdtype != "CNAME": + if (not event_whitelisted) and rdtype != "CNAME": with suppress(ValidationError): if self.scan.whitelisted(host): event_whitelisted = True + dns_tags.add(f"dns-whitelisted-{rdtype.lower()}") # CNAME to a blacklisted resource, means you're blacklisted with suppress(ValidationError): if self.scan.blacklisted(host): dns_tags.add("blacklisted") + dns_tags.add(f"dns-blacklisted-{rdtype.lower()}") event_blacklisted = True + event_whitelisted = False break # check for private IPs @@ -145,125 +166,105 @@ async def handle_event(self, event, **kwargs): except ValueError: continue - # only emit DNS children if we haven't seen this host before - emit_children = (not self.minimal) and (event_host_hash not in self._event_cache) + # add DNS tags to main host + for tag in dns_tags: + main_host_event.add_tag(tag) + + # set resolved_hosts attribute + for rdtype, children in main_host_event.dns_children.items(): + if rdtype in ("A", "AAAA", "CNAME"): + for host in children: + main_host_event._resolved_hosts.add(host) + + # if we're not blacklisted, emit the main host event and all its raw records + if not event_blacklisted: + if event_whitelisted: + self.debug( + f"Making {main_host_event} in-scope because it resolves to an in-scope resource (A/AAAA)" + ) + main_host_event.scope_distance = 0 + await self.handle_wildcard_event(main_host_event) + + if event != main_host_event: + await self.emit_event(main_host_event) + for raw_record_event in raw_record_events: + await self.emit_event(raw_record_event) + + # kill runaway DNS chains + dns_resolve_distance = getattr(event, "dns_resolve_distance", 0) + if dns_resolve_distance >= self.helpers.dns.runaway_limit: + self.debug( + f"Skipping DNS children for {event} because their DNS resolve distances would be greater than the configured value for this scan ({self.helpers.dns.runaway_limit})" + ) + main_host_event.dns_children = {} + + # emit DNS children + if not self.minimal: + in_dns_scope = -1 < event.scope_distance < self._dns_search_distance + for rdtype, records in main_host_event.dns_children.items(): + module = self.scan._make_dummy_module_dns(rdtype) + for record in records: + try: + child_event = self.scan.make_event( + record, "DNS_NAME", module=module, parent=main_host_event + ) + child_event.discovery_context = f"{rdtype} record for {event.host} contains {child_event.type}: {child_event.host}" + # if it's a hostname and it's only one hop away, mark it as affiliate + if child_event.type == "DNS_NAME" and child_event.scope_distance == 1: + child_event.add_tag("affiliate") + if in_dns_scope or self.preset.in_scope(child_event): + self.debug(f"Queueing DNS child for {event}: {child_event}") + await self.emit_event(child_event) + except ValidationError as e: + self.warning( + f'Event validation failed for DNS child of {main_host_event}: "{record}" ({rdtype}): {e}' + ) # store results in cache - self._event_cache[event_host_hash] = dns_tags, dns_children, event_whitelisted, event_blacklisted + self._event_cache[event_host_hash] = main_host_event, dns_tags, event_whitelisted, event_blacklisted # abort if the event resolves to something blacklisted if event_blacklisted: - event.add_tag("blacklisted") return False, f"it has a blacklisted DNS record" - # set resolved_hosts attribute - for rdtype, children in dns_children.items(): - if rdtype in ("A", "AAAA", "CNAME"): - for host in children: - event.resolved_hosts.add(host) - - # set dns_children attribute - event.dns_children = dns_children - # if the event resolves to an in-scope IP, set its scope distance to 0 if event_whitelisted: self.debug(f"Making {event} in-scope because it resolves to an in-scope resource") event.scope_distance = 0 + await self.handle_wildcard_event(event) - # check for wildcards, only if the event resolves to something that isn't an IP - if (not event_is_ip) and (dns_children): - if event.scope_distance <= self.scan.scope_search_distance: - await self.handle_wildcard_event(event) - - # kill runaway DNS chains - dns_resolve_distance = getattr(event, "dns_resolve_distance", 0) - if dns_resolve_distance >= self.helpers.dns.runaway_limit: - self.debug( - f"Skipping DNS children for {event} because their DNS resolve distances would be greater than the configured value for this scan ({self.helpers.dns.runaway_limit})" - ) - dns_children = {} - - # if the event is a DNS_NAME or IP, tag with "a-record", "ptr-record", etc. - if event.type in ("DNS_NAME", "IP_ADDRESS"): - for tag in dns_tags: - event.add_tag(tag) + # transfer resolved hosts + event._resolved_hosts = main_host_event._resolved_hosts # If the event is unresolved, change its type to DNS_NAME_UNRESOLVED if event.type == "DNS_NAME" and "unresolved" in event.tags: event.type = "DNS_NAME_UNRESOLVED" - # speculate DNS_NAMES and IP_ADDRESSes from other event types - parent_event = event - if ( - event.host - and event.type not in ("DNS_NAME", "DNS_NAME_UNRESOLVED", "IP_ADDRESS", "IP_RANGE") - and not ((event.type in ("OPEN_TCP_PORT", "URL_UNVERIFIED") and str(event.module) == "speculate")) - ): - parent_event = self.scan.make_event( - event.host, - "DNS_NAME", - module=self.host_module, - parent=event, - context="{event.parent.type} has host {event.type}: {event.host}", - ) - # only emit the event if it's not already in the parent chain - if parent_event is not None and (parent_event.always_emit or parent_event not in event.get_parents()): - parent_event.scope_distance = event.scope_distance - if "target" in event.tags: - parent_event.add_tag("target") - await self.emit_event( - parent_event, - ) - - # emit DNS children - if emit_children: - in_dns_scope = -1 < event.scope_distance < self._dns_search_distance - dns_child_events = [] - if dns_children: - for rdtype, records in dns_children.items(): - module = self.scan._make_dummy_module_dns(rdtype) - for record in records: - try: - child_event = self.scan.make_event(record, "DNS_NAME", module=module, parent=parent_event) - child_event.discovery_context = ( - f"{rdtype} record for {event.host} contains {child_event.type}: {child_event.host}" - ) - # if it's a hostname and it's only one hop away, mark it as affiliate - if child_event.type == "DNS_NAME" and child_event.scope_distance == 1: - child_event.add_tag("affiliate") - if in_dns_scope or self.preset.in_scope(child_event): - dns_child_events.append(child_event) - except ValidationError as e: - self.warning( - f'Event validation failed for DNS child of {parent_event}: "{record}" ({rdtype}): {e}' - ) - for child_event in dns_child_events: - self.debug(f"Queueing DNS child for {event}: {child_event}") - await self.emit_event(child_event) - async def handle_wildcard_event(self, event): - self.debug(f"Entering handle_wildcard_event({event}, children={event.dns_children})") + self.debug(f"Entering handle_wildcard_event({event})") try: event_host = str(event.host) # check if the dns name itself is a wildcard entry wildcard_rdtypes = await self.helpers.is_wildcard(event_host) for rdtype, (is_wildcard, wildcard_host) in wildcard_rdtypes.items(): - wildcard_tag = "error" - if is_wildcard == True: + if is_wildcard == False: + continue + elif is_wildcard == True: event.add_tag("wildcard") wildcard_tag = "wildcard" + elif is_wildcard == None: + wildcard_tag = "error" + event.add_tag(f"{rdtype.lower()}-{wildcard_tag}") # wildcard event modification (www.evilcorp.com --> _wildcard.evilcorp.com) if wildcard_rdtypes and not "target" in event.tags: - # these are the rdtypes that successfully resolve - resolved_rdtypes = set([c.upper() for c in event.dns_children]) # these are the rdtypes that have wildcards wildcard_rdtypes_set = set(wildcard_rdtypes) # consider the event a full wildcard if all its records are wildcards event_is_wildcard = False - if resolved_rdtypes: - event_is_wildcard = all(r in wildcard_rdtypes_set for r in resolved_rdtypes) + if wildcard_rdtypes_set: + event_is_wildcard = all(r[0] == True for r in wildcard_rdtypes.values()) if event_is_wildcard: if event.type in ("DNS_NAME",) and not "_wildcard" in event.data.split("."): @@ -278,4 +279,23 @@ async def handle_wildcard_event(self, event): event.data = wildcard_data finally: - self.debug(f"Finished handle_wildcard_event({event}, children={event.dns_children})") + self.debug(f"Finished handle_wildcard_event({event})") + + def get_dns_parent(self, event): + """ + Get the first parent DNS_NAME / IP_ADDRESS of an event. If one isn't found, create it. + """ + for parent in event.get_parents(include_self=True): + if parent.host == event.host and parent.type in ("IP_ADDRESS", "DNS_NAME", "DNS_NAME_UNRESOLVED"): + return parent + tags = set() + if "target" in event.tags: + tags.add("target") + return self.scan.make_event( + event.host, + "DNS_NAME", + module=self.host_module, + parent=event, + context="{event.parent.type} has host {event.type}: {event.host}", + tags=tags, + ) diff --git a/bbot/modules/internal/excavate.py b/bbot/modules/internal/excavate.py index f3aa3ab70..f0286fd6b 100644 --- a/bbot/modules/internal/excavate.py +++ b/bbot/modules/internal/excavate.py @@ -252,8 +252,8 @@ async def report( event_data["description"] = f"{discovery_context} {yara_rule_settings['self.description']}" subject = "" if isinstance(event_data, str): - subject = f" event_data" - context = f"Excavate's [{self.__class__.__name__}] submodule emitted [{event_type}]{subject}, because {discovery_context} {yara_rule_settings.description}" + subject = f" {event_data}" + context = f"Excavate's {self.__class__.__name__} emitted {event_type}{subject}, because {discovery_context} {yara_rule_settings.description}" tags = yara_rule_settings.tags event_draft = await self.report_prep(event_data, event_type, event, tags, **kwargs) if event_draft: diff --git a/bbot/modules/internal/speculate.py b/bbot/modules/internal/speculate.py index 1578a08c9..bb73094ff 100644 --- a/bbot/modules/internal/speculate.py +++ b/bbot/modules/internal/speculate.py @@ -187,8 +187,6 @@ async def handle_event(self, event): self.org_stubs_seen.add(stub_hash) stub_event = self.make_event(stub, "ORG_STUB", parent=event) if stub_event: - if event.scope_distance > 0: - stub_event.scope_distance = event.scope_distance await self.emit_event(stub_event, context="speculated {event.type}: {event.data}") # USERNAME --> EMAIL @@ -197,5 +195,4 @@ async def handle_event(self, event): if validators.soft_validate(email, "email"): email_event = self.make_event(email, "EMAIL_ADDRESS", parent=event, tags=["affiliate"]) if email_event: - email_event.scope_distance = event.scope_distance await self.emit_event(email_event, context="detected {event.type}: {event.data}") diff --git a/bbot/modules/ipneighbor.py b/bbot/modules/ipneighbor.py index 3aab345f2..658383258 100644 --- a/bbot/modules/ipneighbor.py +++ b/bbot/modules/ipneighbor.py @@ -37,8 +37,6 @@ async def handle_event(self, event): if ip != main_ip: ip_event = self.make_event(str(ip), "IP_ADDRESS", event, internal=True) if ip_event: - # keep the scope distance low to give it one more hop for DNS resolution - # ip_event.scope_distance = max(1, event.scope_distance) await self.emit_event( ip_event, context="{module} produced {event.type}: {event.data}", diff --git a/bbot/modules/output/neo4j.py b/bbot/modules/output/neo4j.py index 0fd6477d1..bb7c9e5c4 100644 --- a/bbot/modules/output/neo4j.py +++ b/bbot/modules/output/neo4j.py @@ -1,3 +1,4 @@ +import json from contextlib import suppress from neo4j import AsyncGraphDatabase @@ -53,7 +54,6 @@ async def setup(self): return True async def handle_batch(self, *all_events): - await self.helpers.sleep(5) # group events by type, since cypher doesn't allow dynamic labels events_by_type = {} parents_by_type = {} @@ -87,7 +87,7 @@ async def handle_batch(self, *all_events): src_id = all_ids[parent.id] dst_id = all_ids[event.id] except KeyError as e: - self.critical(f'Error "{e}" correlating {parent.id}:{parent.data} --> {event.id}:{event.data}') + self.error(f'Error "{e}" correlating {parent.id}:{parent.data} --> {event.id}:{event.data}') continue rel_ids.append((src_id, module, timestamp, dst_id)) @@ -103,21 +103,28 @@ async def merge_events(self, events, event_type, id_only=False): # we pop the timestamp because it belongs on the relationship event_json.pop("timestamp") # nested data types aren't supported in neo4j - event_json.pop("dns_children", None) + for key in ("dns_children", "discovery_path"): + if key in event_json: + event_json[key] = json.dumps(event_json[key]) insert_data.append(event_json) cypher = f"""UNWIND $events AS event MERGE (_:{event_type} {{ id: event.id }}) SET _ += event RETURN event.data as event_data, event.id as event_id, elementId(_) as neo4j_id""" - # insert events - results = await self.session.run(cypher, events=insert_data) - # get Neo4j ids neo4j_ids = {} - for result in await results.data(): - event_id = result["event_id"] - neo4j_id = result["neo4j_id"] - neo4j_ids[event_id] = neo4j_id + # insert events + try: + results = await self.session.run(cypher, events=insert_data) + # get Neo4j ids + for result in await results.data(): + event_id = result["event_id"] + neo4j_id = result["neo4j_id"] + neo4j_ids[event_id] = neo4j_id + except Exception as e: + self.error(f"Error inserting Neo4j nodes (label:{event_type}): {e}") + self.trace(insert_data) + self.trace(cypher) return neo4j_ids async def merge_relationships(self, relationships): @@ -138,7 +145,11 @@ async def merge_relationships(self, relationships): MATCH (b) WHERE elementId(b) = rel.dst_id MERGE (a)-[_:{module}]->(b) SET _.timestamp = rel.timestamp""" - await self.session.run(cypher, rels=rels) + try: + await self.session.run(cypher, rels=rels) + except Exception as e: + self.error(f"Error inserting Neo4j relationship (label:{module}): {e}") + self.trace(cypher) async def cleanup(self): with suppress(Exception): diff --git a/bbot/modules/postman.py b/bbot/modules/postman.py index e736bec1a..e4d8895db 100644 --- a/bbot/modules/postman.py +++ b/bbot/modules/postman.py @@ -23,9 +23,6 @@ class postman(subdomain_enum): reject_wildcards = False - # wait until outgoing queue is empty to help avoid rate limits - _qsize = 1 - async def handle_event(self, event): query = self.make_query(event) self.verbose(f"Searching for any postman workspaces, collections, requests belonging to {query}") diff --git a/bbot/modules/social.py b/bbot/modules/social.py index b80f6c18a..0c834cd7f 100644 --- a/bbot/modules/social.py +++ b/bbot/modules/social.py @@ -42,13 +42,15 @@ async def handle_event(self, event): url = url.lower() profile_name = profile_name.lower() url = f"https://{url}" - social_event = self.make_event( - {"platform": platform, "url": url, "profile_name": profile_name}, - "SOCIAL", - parent=event, - ) - social_event.scope_distance = event.scope_distance - await self.emit_event( - social_event, - context=f"{{module}} detected {platform} {{event.type}} at {url}", - ) + event_data = {"platform": platform, "url": url, "profile_name": profile_name} + # only emit if the same event isn't already in the parent chain + if not any([e.type == "SOCIAL" and e.data == event_data for e in event.get_parents()]): + social_event = self.make_event( + event_data, + "SOCIAL", + parent=event, + ) + await self.emit_event( + social_event, + context=f"{{module}} detected {platform} {{event.type}} at {url}", + ) diff --git a/bbot/modules/templates/subdomain_enum.py b/bbot/modules/templates/subdomain_enum.py index 95c7995d3..28f775d2a 100644 --- a/bbot/modules/templates/subdomain_enum.py +++ b/bbot/modules/templates/subdomain_enum.py @@ -20,9 +20,10 @@ class subdomain_enum(BaseModule): # whether to reject wildcard DNS_NAMEs reject_wildcards = "strict" - # set qsize to 1. this helps combat rate limiting by ensuring that a query doesn't execute + # set qsize to 10. this helps combat rate limiting by ensuring that a query doesn't execute # until the queue is ready to receive its results - _qsize = 1 + # we don't use 1 because it causes delays due to the asyncio.sleep; 10 gives us reasonable buffer room + _qsize = 10 # how to deduplicate incoming events # options: diff --git a/bbot/modules/unstructured.py b/bbot/modules/unstructured.py index 06118a348..4143ea2fd 100644 --- a/bbot/modules/unstructured.py +++ b/bbot/modules/unstructured.py @@ -94,7 +94,6 @@ async def handle_event(self, event): file_event = self.make_event( {"path": str(file_path)}, "FILESYSTEM", tags=["parsed_folder", "file"], parent=event ) - file_event.scope_distance = event.scope_distance await self.emit_event(file_event) elif "file" in event.tags: file_path = event.data["path"] diff --git a/bbot/modules/wpscan.py b/bbot/modules/wpscan.py index 382bd2606..60f247af4 100644 --- a/bbot/modules/wpscan.py +++ b/bbot/modules/wpscan.py @@ -166,7 +166,6 @@ def parse_wp_misc(self, interesting_json, base_url, source_event): else: url_event = self.make_event(url, "URL_UNVERIFIED", parent=source_event, tags=["httpx-safe"]) if url_event: - url_event.scope_distance = source_event.scope_distance yield url_event yield self.make_event( {"description": description_string, "url": url, "host": str(source_event.host)}, @@ -228,7 +227,6 @@ def parse_wp_plugins(self, plugins_json, base_url, source_event): if url != base_url: url_event = self.make_event(url, "URL_UNVERIFIED", parent=source_event, tags=["httpx-safe"]) if url_event: - url_event.scope_distance = source_event.scope_distance yield url_event version = plugin.get("version", {}).get("number", "") if version: diff --git a/bbot/scanner/manager.py b/bbot/scanner/manager.py index ad722f4fc..cdae044a8 100644 --- a/bbot/scanner/manager.py +++ b/bbot/scanner/manager.py @@ -1,11 +1,8 @@ import asyncio -import logging from contextlib import suppress from bbot.modules.base import InterceptModule -log = logging.getLogger("bbot.scanner.manager") - class ScanIngress(InterceptModule): """ @@ -109,10 +106,11 @@ async def handle_event(self, event, **kwargs): # Scope shepherding # here is where we make sure in-scope events are set to their proper scope distance - event_whitelisted = self.scan.whitelisted(event) - if event.host and event_whitelisted: - log.debug(f"Making {event} in-scope because it matches the scan target") - event.scope_distance = 0 + if event.host: + event_whitelisted = self.scan.whitelisted(event) + if event_whitelisted: + self.debug(f"Making {event} in-scope because its main host matches the scan target") + event.scope_distance = 0 # nerf event's priority if it's not in scope event.module_priority += event.scope_distance @@ -203,13 +201,13 @@ async def handle_event(self, event, **kwargs): event_will_be_output = event.always_emit or event_in_report_distance if not event_will_be_output: - log.debug( + self.debug( f"Making {event} internal because its scope_distance ({event.scope_distance}) > scope_report_distance ({self.scan.scope_report_distance})" ) event.internal = True if event.type in self.scan.omitted_event_types: - log.debug(f"Omitting {event} because its type is omitted in the config") + self.debug(f"Omitting {event} because its type is omitted in the config") event._omit = True # if we discovered something interesting from an internal event, @@ -223,7 +221,7 @@ async def handle_event(self, event, **kwargs): parent.internal = False if not parent._graph_important: parent._graph_important = True - log.debug(f"Re-queuing internal event {parent} with parent {event} to prevent graph orphan") + self.debug(f"Re-queuing internal event {parent} with parent {event} to prevent graph orphan") await self.emit_event(parent) abort_result = False diff --git a/bbot/scanner/scanner.py b/bbot/scanner/scanner.py index 0fe4191bf..fa2abfadf 100644 --- a/bbot/scanner/scanner.py +++ b/bbot/scanner/scanner.py @@ -172,10 +172,6 @@ def __init__( self.dispatcher = dispatcher self.dispatcher.set_scan(self) - from .stats import ScanStats - - self.stats = ScanStats(self) - # scope distance self.scope_config = self.config.get("scope", {}) self.scope_search_distance = max(0, int(self.scope_config.get("search_distance", 0))) @@ -215,6 +211,10 @@ def __init__( # how often to print scan status self.status_frequency = self.config.get("status_frequency", 15) + from .stats import ScanStats + + self.stats = ScanStats(self) + self._prepped = False self._finished_init = False self._new_activity = False @@ -642,11 +642,11 @@ def modules_status(self, _log=False): num_queued_events = self.num_queued_events if num_queued_events: self.info( - f"{self.name}: {num_queued_events:,} events in queue ({self.stats.speedometer.speed:,} processed in the past minute)" + f"{self.name}: {num_queued_events:,} events in queue ({self.stats.speedometer.speed:,} processed in the past {self.status_frequency} seconds)" ) else: self.info( - f"{self.name}: No events in queue ({self.stats.speedometer.speed:,} processed in the past minute)" + f"{self.name}: No events in queue ({self.stats.speedometer.speed:,} processed in the past {self.status_frequency} seconds)" ) if self.log_level <= logging.DEBUG: diff --git a/bbot/scanner/stats.py b/bbot/scanner/stats.py index 6ae86c044..38d95032f 100644 --- a/bbot/scanner/stats.py +++ b/bbot/scanner/stats.py @@ -41,7 +41,7 @@ def __init__(self, scan): self.scan = scan self.module_stats = {} self.events_emitted_by_type = {} - self.speedometer = SpeedCounter(60) + self.speedometer = SpeedCounter(scan.status_frequency) def event_produced(self, event): _increment(self.events_emitted_by_type, event.type) diff --git a/bbot/test/conftest.py b/bbot/test/conftest.py index 161f1c3a3..7cccc950d 100644 --- a/bbot/test/conftest.py +++ b/bbot/test/conftest.py @@ -218,80 +218,81 @@ def pytest_terminal_summary(terminalreporter, exitstatus, config): # pragma: no print(f"{RED}Failure details:\n{item.longreprtext}{RESET}") -import psutil -import traceback -import inspect - - -def _print_detailed_info(): # pragma: no cover - """ - Debugging pytests hanging - """ - print("=== Detailed Thread and Process Information ===\n") - try: - print("=== Threads ===") - for thread in threading.enumerate(): - print(f"Thread Name: {thread.name}") - print(f"Thread ID: {thread.ident}") - print(f"Is Alive: {thread.is_alive()}") - print(f"Daemon: {thread.daemon}") - - if hasattr(thread, "_target"): - target = thread._target - if target: - qualname = ( - f"{target.__module__}.{target.__qualname__}" - if hasattr(target, "__qualname__") - else str(target) - ) - print(f"Target Function: {qualname}") - - if hasattr(thread, "_args"): - args = thread._args - kwargs = thread._kwargs if hasattr(thread, "_kwargs") else {} - arg_spec = inspect.getfullargspec(target) - - all_args = list(args) + [f"{k}={v}" for k, v in kwargs.items()] - - if inspect.ismethod(target) and arg_spec.args[0] == "self": - arg_spec.args.pop(0) - - named_args = list(zip(arg_spec.args, all_args)) - if arg_spec.varargs: - named_args.extend((f"*{arg_spec.varargs}", arg) for arg in all_args[len(arg_spec.args) :]) - - print("Arguments:") - for name, value in named_args: - print(f" {name}: {value}") - else: - print("Target Function: None") - else: - print("Target Function: Unknown") - - print() - - print("=== Processes ===") - current_process = psutil.Process() - for child in current_process.children(recursive=True): - print(f"Process ID: {child.pid}") - print(f"Name: {child.name()}") - print(f"Status: {child.status()}") - print(f"CPU Times: {child.cpu_times()}") - print(f"Memory Info: {child.memory_info()}") - print() - - print("=== Current Process ===") - print(f"Process ID: {current_process.pid}") - print(f"Name: {current_process.name()}") - print(f"Status: {current_process.status()}") - print(f"CPU Times: {current_process.cpu_times()}") - print(f"Memory Info: {current_process.memory_info()}") - print() - - except Exception as e: - print(f"An error occurred: {str(e)}") - print("Traceback:") - traceback.print_exc() +# BELOW: debugging for frozen/hung tests +# import psutil +# import traceback +# import inspect + + +# def _print_detailed_info(): # pragma: no cover +# """ +# Debugging pytests hanging +# """ +# print("=== Detailed Thread and Process Information ===\n") +# try: +# print("=== Threads ===") +# for thread in threading.enumerate(): +# print(f"Thread Name: {thread.name}") +# print(f"Thread ID: {thread.ident}") +# print(f"Is Alive: {thread.is_alive()}") +# print(f"Daemon: {thread.daemon}") + +# if hasattr(thread, "_target"): +# target = thread._target +# if target: +# qualname = ( +# f"{target.__module__}.{target.__qualname__}" +# if hasattr(target, "__qualname__") +# else str(target) +# ) +# print(f"Target Function: {qualname}") + +# if hasattr(thread, "_args"): +# args = thread._args +# kwargs = thread._kwargs if hasattr(thread, "_kwargs") else {} +# arg_spec = inspect.getfullargspec(target) + +# all_args = list(args) + [f"{k}={v}" for k, v in kwargs.items()] + +# if inspect.ismethod(target) and arg_spec.args[0] == "self": +# arg_spec.args.pop(0) + +# named_args = list(zip(arg_spec.args, all_args)) +# if arg_spec.varargs: +# named_args.extend((f"*{arg_spec.varargs}", arg) for arg in all_args[len(arg_spec.args) :]) + +# print("Arguments:") +# for name, value in named_args: +# print(f" {name}: {value}") +# else: +# print("Target Function: None") +# else: +# print("Target Function: Unknown") + +# print() + +# print("=== Processes ===") +# current_process = psutil.Process() +# for child in current_process.children(recursive=True): +# print(f"Process ID: {child.pid}") +# print(f"Name: {child.name()}") +# print(f"Status: {child.status()}") +# print(f"CPU Times: {child.cpu_times()}") +# print(f"Memory Info: {child.memory_info()}") +# print() + +# print("=== Current Process ===") +# print(f"Process ID: {current_process.pid}") +# print(f"Name: {current_process.name()}") +# print(f"Status: {current_process.status()}") +# print(f"CPU Times: {current_process.cpu_times()}") +# print(f"Memory Info: {current_process.memory_info()}") +# print() + +# except Exception as e: +# print(f"An error occurred: {str(e)}") +# print("Traceback:") +# traceback.print_exc() @pytest.hookimpl(tryfirst=True, hookwrapper=True) @@ -309,11 +310,11 @@ def pytest_sessionfinish(session, exitstatus): yield # temporarily suspend stdout capture and print detailed thread info - capmanager = session.config.pluginmanager.get_plugin("capturemanager") - if capmanager: - capmanager.suspend_global_capture(in_=True) + # capmanager = session.config.pluginmanager.get_plugin("capturemanager") + # if capmanager: + # capmanager.suspend_global_capture(in_=True) - _print_detailed_info() + # _print_detailed_info() - if capmanager: - capmanager.resume_global_capture() + # if capmanager: + # capmanager.resume_global_capture() diff --git a/bbot/test/test.conf b/bbot/test/test.conf index 928137b8e..8ae91bcf3 100644 --- a/bbot/test/test.conf +++ b/bbot/test/test.conf @@ -12,8 +12,6 @@ modules: websocket: url: ws://127.0.0.1/ws:11111 token: asdf - neo4j: - uri: bolt://127.0.0.1:11111 web: http_proxy: http_headers: { "test": "header" } diff --git a/bbot/test/test_step_1/test_dns.py b/bbot/test/test_step_1/test_dns.py index e38db4fe5..b2bcb68fe 100644 --- a/bbot/test/test_step_1/test_dns.py +++ b/bbot/test/test_step_1/test_dns.py @@ -155,9 +155,11 @@ async def test_dns_resolution(bbot_scanner): assert hash(resolved_hosts_event2.host) in dnsresolve._event_cache await dnsresolve.handle_event(resolved_hosts_event2) assert "1.1.1.1" in resolved_hosts_event2.resolved_hosts - assert "1.1.1.1" in resolved_hosts_event2.dns_children["A"] + # URL event should not have dns_children + assert not resolved_hosts_event2.dns_children assert resolved_hosts_event1.resolved_hosts == resolved_hosts_event2.resolved_hosts - assert resolved_hosts_event1.dns_children == resolved_hosts_event2.dns_children + # DNS_NAME event should have dns_children + assert "1.1.1.1" in resolved_hosts_event1.dns_children["A"] assert "a-record" in resolved_hosts_event1.tags assert not "a-record" in resolved_hosts_event2.tags @@ -213,9 +215,12 @@ async def test_wildcards(bbot_scanner): assert not hash("asdf.asdf.github.io") in dnsengine._wildcard_cache assert not hash("asdf.asdf.asdf.github.io") in dnsengine._wildcard_cache assert len(dnsengine._wildcard_cache[hash("github.io")]) > 0 - wildcard_event1 = scan.make_event("wat.asdf.fdsa.github.io", "DNS_NAME", dummy=True) - wildcard_event2 = scan.make_event("wats.asd.fdsa.github.io", "DNS_NAME", dummy=True) - wildcard_event3 = scan.make_event("github.io", "DNS_NAME", dummy=True) + wildcard_event1 = scan.make_event("wat.asdf.fdsa.github.io", "DNS_NAME", parent=scan.root_event) + wildcard_event1.scope_distance = 0 + wildcard_event2 = scan.make_event("wats.asd.fdsa.github.io", "DNS_NAME", parent=scan.root_event) + wildcard_event2.scope_distance = 0 + wildcard_event3 = scan.make_event("github.io", "DNS_NAME", parent=scan.root_event) + wildcard_event3.scope_distance = 0 await dnsengine._shutdown() @@ -427,3 +432,31 @@ async def handle_event(self, event): and e.discovery_context == "TXT lookup on one.one.one.one produced RAW_DNS_RECORD" ] ) + + +@pytest.mark.asyncio +async def test_dns_graph_structure(bbot_scanner): + scan = bbot_scanner("https://evilcorp.com", config={"dns": {"search_distance": 1, "minimal": False}}) + await scan.helpers.dns._mock_dns( + { + "evilcorp.com": { + "CNAME": [ + "www.evilcorp.com", + ] + }, + "www.evilcorp.com": {"CNAME": ["test.evilcorp.com"]}, + "test.evilcorp.com": {"A": ["127.0.0.1"]}, + } + ) + events = [e async for e in scan.async_start()] + assert len(events) == 5 + non_scan_events = [e for e in events if e.type != "SCAN"] + assert sorted([e.type for e in non_scan_events]) == ["DNS_NAME", "DNS_NAME", "DNS_NAME", "URL_UNVERIFIED"] + events_by_data = {e.data: e for e in non_scan_events} + assert set(events_by_data) == {"https://evilcorp.com/", "evilcorp.com", "www.evilcorp.com", "test.evilcorp.com"} + assert events_by_data["test.evilcorp.com"].parent.data == "www.evilcorp.com" + assert str(events_by_data["test.evilcorp.com"].module) == "CNAME" + assert events_by_data["www.evilcorp.com"].parent.data == "evilcorp.com" + assert str(events_by_data["www.evilcorp.com"].module) == "CNAME" + assert events_by_data["evilcorp.com"].parent.data == "https://evilcorp.com/" + assert str(events_by_data["evilcorp.com"].module) == "host" diff --git a/bbot/test/test_step_1/test_events.py b/bbot/test/test_step_1/test_events.py index 768533311..4f42c1bb0 100644 --- a/bbot/test/test_step_1/test_events.py +++ b/bbot/test/test_step_1/test_events.py @@ -171,7 +171,7 @@ async def test_events(events, helpers): # scope distance event1 = scan.make_event("1.2.3.4", dummy=True) - assert event1._scope_distance == -1 + assert event1._scope_distance == None event1.scope_distance = 0 assert event1._scope_distance == 0 event2 = scan.make_event("2.3.4.5", parent=event1) @@ -197,6 +197,7 @@ async def test_events(events, helpers): # internal event tracking root_event = scan.make_event("0.0.0.0", dummy=True) + root_event.scope_distance = 0 internal_event1 = scan.make_event("1.2.3.4", parent=root_event, internal=True) assert internal_event1._internal == True assert "internal" in internal_event1.tags @@ -475,6 +476,7 @@ async def test_events(events, helpers): event_5 = scan.make_event("127.0.0.5", parent=event_4) assert event_5.get_parents() == [event_4, event_3, event_2, event_1, scan.root_event] assert event_5.get_parents(omit=True) == [event_4, event_2, event_1, scan.root_event] + assert event_5.get_parents(include_self=True) == [event_5, event_4, event_3, event_2, event_1, scan.root_event] # test host backup host_event = scan.make_event("asdf.evilcorp.com", "DNS_NAME", parent=scan.root_event) @@ -680,3 +682,78 @@ async def handle_event(self, event): assert blsops_event[0].discovery_path[1][-1] == "URL_UNVERIFIED has host DNS_NAME: blacklanternsecurity.com" await scan._cleanup() + + +@pytest.mark.asyncio +async def test_event_web_spider_distance(bbot_scanner): + # make sure web spider distance inheritance works as intended + # and we don't have any runaway situations with SOCIAL events + URLs + scan = bbot_scanner(config={"web": {"spider_distance": 1}}) + url_event_1 = scan.make_event("http://www.evilcorp.com/test1", "URL_UNVERIFIED", parent=scan.root_event) + assert url_event_1.web_spider_distance == 0 + url_event_2 = scan.make_event("http://www.evilcorp.com/test2", "URL_UNVERIFIED", parent=url_event_1) + assert url_event_2.web_spider_distance == 0 + url_event_3 = scan.make_event( + "http://www.evilcorp.com/test3", "URL_UNVERIFIED", parent=url_event_2, tags=["spider-danger"] + ) + assert url_event_3.web_spider_distance == 1 + assert "spider-danger" in url_event_3.tags + assert not "spider-max" in url_event_3.tags + social_event = scan.make_event( + {"platform": "github", "url": "http://www.evilcorp.com/test4"}, "SOCIAL", parent=url_event_3 + ) + assert social_event.web_spider_distance == 1 + assert "spider-danger" in social_event.tags + url_event_4 = scan.make_event("http://www.evilcorp.com/test4", "URL_UNVERIFIED", parent=social_event) + assert url_event_4.web_spider_distance == 2 + assert "spider-danger" in url_event_4.tags + assert "spider-max" in url_event_4.tags + social_event_2 = scan.make_event( + {"platform": "github", "url": "http://www.evilcorp.com/test5"}, "SOCIAL", parent=url_event_4 + ) + assert social_event_2.web_spider_distance == 2 + assert "spider-danger" in social_event_2.tags + assert "spider-max" in social_event_2.tags + url_event_5 = scan.make_event("http://www.evilcorp.com/test5", "URL_UNVERIFIED", parent=social_event_2) + assert url_event_5.web_spider_distance == 3 + assert "spider-danger" in url_event_5.tags + assert "spider-max" in url_event_5.tags + + url_event = scan.make_event("http://www.evilcorp.com", "URL_UNVERIFIED", parent=scan.root_event) + assert url_event.web_spider_distance == 0 + assert not "spider-danger" in url_event.tags + assert not "spider-max" in url_event.tags + url_event_2 = scan.make_event( + "http://www.evilcorp.com", "URL_UNVERIFIED", parent=scan.root_event, tags="spider-danger" + ) + # spider distance shouldn't increment because it's not the same host + assert url_event_2.web_spider_distance == 0 + assert "spider-danger" in url_event_2.tags + assert not "spider-max" in url_event_2.tags + url_event_3 = scan.make_event( + "http://www.evilcorp.com/3", "URL_UNVERIFIED", parent=url_event_2, tags="spider-danger" + ) + assert url_event_3.web_spider_distance == 1 + assert "spider-danger" in url_event_3.tags + assert not "spider-max" in url_event_3.tags + url_event_4 = scan.make_event("http://evilcorp.com", "URL_UNVERIFIED", parent=url_event_3) + assert url_event_4.web_spider_distance == 0 + assert not "spider-danger" in url_event_4.tags + assert not "spider-max" in url_event_4.tags + url_event_4.add_tag("spider-danger") + assert url_event_4.web_spider_distance == 0 + assert "spider-danger" in url_event_4.tags + assert not "spider-max" in url_event_4.tags + url_event_4.remove_tag("spider-danger") + assert url_event_4.web_spider_distance == 0 + assert not "spider-danger" in url_event_4.tags + assert not "spider-max" in url_event_4.tags + url_event_5 = scan.make_event("http://evilcorp.com/5", "URL_UNVERIFIED", parent=url_event_4) + assert url_event_5.web_spider_distance == 0 + assert not "spider-danger" in url_event_5.tags + assert not "spider-max" in url_event_5.tags + url_event_5.add_tag("spider-danger") + # if host is the same as parent, web spider distance should auto-increment after adding spider-danger tag + assert url_event_5.web_spider_distance == 1 + assert "spider-danger" in url_event_5.tags + assert not "spider-max" in url_event_5.tags diff --git a/bbot/test/test_step_1/test_manager_scope_accuracy.py b/bbot/test/test_step_1/test_manager_scope_accuracy.py index 3ab9b0f01..bef9b13e6 100644 --- a/bbot/test/test_step_1/test_manager_scope_accuracy.py +++ b/bbot/test/test_step_1/test_manager_scope_accuracy.py @@ -348,7 +348,7 @@ def custom_setup(scan): assert 1 == len([e for e in all_events if e.type == "URL" and e.data == "http://127.0.0.1:8888/" and e.internal == False and e.scope_distance == 0]) assert 1 == len([e for e in all_events if e.type == "HTTP_RESPONSE" and e.data["input"] == "127.0.0.1:8888" and e.internal == False and e.scope_distance == 0]) assert 1 == len([e for e in all_events if e.type == "URL_UNVERIFIED" and e.data == "http://127.0.0.1:8888/" and e.internal == False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events if e.type == "URL_UNVERIFIED" and e.data == "http://127.0.0.77:8888/" and e.internal == False and e.scope_distance == 1 and "spider-max" in e.tags and "spider-danger" in e.tags]) + assert 1 == len([e for e in all_events if e.type == "URL_UNVERIFIED" and e.data == "http://127.0.0.77:8888/" and e.internal == False and e.scope_distance == 1 and "spider-danger" in e.tags]) assert 1 == len([e for e in all_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.77" and e.internal == False and e.scope_distance == 1]) assert 1 == len([e for e in all_events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.77:8888" and e.internal == True and e.scope_distance == 1]) @@ -361,7 +361,7 @@ def custom_setup(scan): assert 1 == len([e for e in all_events_nodups if e.type == "URL" and e.data == "http://127.0.0.1:8888/" and e.internal == False and e.scope_distance == 0]) assert 1 == len([e for e in all_events_nodups if e.type == "HTTP_RESPONSE" and e.data["input"] == "127.0.0.1:8888" and e.internal == False and e.scope_distance == 0]) assert 1 == len([e for e in all_events_nodups if e.type == "URL_UNVERIFIED" and e.data == "http://127.0.0.1:8888/" and e.internal == False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events_nodups if e.type == "URL_UNVERIFIED" and e.data == "http://127.0.0.77:8888/" and e.internal == False and e.scope_distance == 1 and "spider-max" in e.tags and "spider-danger" in e.tags]) + assert 1 == len([e for e in all_events_nodups if e.type == "URL_UNVERIFIED" and e.data == "http://127.0.0.77:8888/" and e.internal == False and e.scope_distance == 1 and "spider-danger" in e.tags]) assert 1 == len([e for e in all_events_nodups if e.type == "IP_ADDRESS" and e.data == "127.0.0.77" and e.internal == False and e.scope_distance == 1]) assert 1 == len([e for e in all_events_nodups if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.77:8888" and e.internal == True and e.scope_distance == 1]) @@ -383,6 +383,7 @@ def custom_setup(scan): events, all_events, all_events_nodups, graph_output_events, graph_output_batch_events = await do_scan( "127.0.0.1/31", modules=["httpx"], + output_modules=["neo4j"], _config={ "dns": {"minimal": False, "search_distance": 2}, "scope": {"search_distance": 0, "report_distance": 1}, @@ -393,7 +394,12 @@ def custom_setup(scan): }, ) - assert len(events) == 8 + assert len(events) == 7 + # 2024-08-01 + # Removed OPEN_TCP_PORT("127.0.0.77:8888") + # before, this event was speculated off the URL_UNVERIFIED, and that's what was used by httpx to generate the URL. it was graph-important. + # now for whatever reason, httpx is visiting the url directly and the open port isn't being used + # I don't know what changed exactly, but it doesn't matter, either way is equally valid and bbot is meant to be flexible this way. assert 1 == len([e for e in events if e.type == "IP_RANGE" and e.data == "127.0.0.0/31" and e.internal == False and e.scope_distance == 0]) assert 0 == len([e for e in events if e.type == "IP_ADDRESS" and e.data == "127.0.0.0"]) assert 1 == len([e for e in events if e.type == "IP_ADDRESS" and e.data == "127.0.0.1" and e.internal == False and e.scope_distance == 0]) @@ -404,13 +410,13 @@ def custom_setup(scan): assert 0 == len([e for e in events if e.type == "URL_UNVERIFIED" and e.data == "http://127.0.0.1:8888/"]) assert 0 == len([e for e in events if e.type == "URL_UNVERIFIED" and e.data == "http://127.0.0.77:8888/"]) assert 1 == len([e for e in events if e.type == "IP_ADDRESS" and e.data == "127.0.0.77" and e.internal == False and e.scope_distance == 1]) - assert 1 == len([e for e in events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.77:8888" and e.internal == False and e.scope_distance == 1]) + assert 0 == len([e for e in events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.77:8888"]) assert 1 == len([e for e in events if e.type == "URL" and e.data == "http://127.0.0.77:8888/" and e.internal == False and e.scope_distance == 1]) assert 0 == len([e for e in events if e.type == "HTTP_RESPONSE" and e.data["input"] == "127.0.0.77:8888"]) assert 0 == len([e for e in events if e.type == "IP_ADDRESS" and e.data == "127.0.0.88"]) assert 0 == len([e for e in events if e.type == "URL_UNVERIFIED" and e.data == "http://127.0.0.77:8888/"]) - assert len(all_events) == 19 + assert len(all_events) == 18 assert 1 == len([e for e in all_events if e.type == "IP_RANGE" and e.data == "127.0.0.0/31" and e.internal == False and e.scope_distance == 0]) assert 1 == len([e for e in all_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.0" and e.internal == True and e.scope_distance == 0]) assert 2 == len([e for e in all_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.1" and e.internal == False and e.scope_distance == 0]) @@ -421,9 +427,9 @@ def custom_setup(scan): assert 1 == len([e for e in all_events if e.type == "URL_UNVERIFIED" and e.data == "http://127.0.0.1:8888/" and e.internal == False and e.scope_distance == 0]) assert 1 == len([e for e in all_events if e.type == "URL_UNVERIFIED" and e.data == "http://127.0.0.77:8888/" and e.internal == False and e.scope_distance == 1]) assert 1 == len([e for e in all_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.77" and e.internal == False and e.scope_distance == 1]) - assert 2 == len([e for e in all_events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.77:8888" and e.internal == False and e.scope_distance == 1]) + assert 1 == len([e for e in all_events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.77:8888" and e.internal == True and e.scope_distance == 1]) assert 1 == len([e for e in all_events if e.type == "URL" and e.data == "http://127.0.0.77:8888/" and e.internal == False and e.scope_distance == 1]) - assert 1 == len([e for e in all_events if e.type == "HTTP_RESPONSE" and e.data["input"] == "127.0.0.77:8888" and e.internal == False and e.scope_distance == 1]) + assert 1 == len([e for e in all_events if e.type == "HTTP_RESPONSE" and e.data["url"] == "http://127.0.0.77:8888/" and e.internal == False and e.scope_distance == 1]) assert 1 == len([e for e in all_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.88" and e.internal == True and e.scope_distance == 2]) assert 1 == len([e for e in all_events if e.type == "URL_UNVERIFIED" and e.data == "http://127.0.0.88:8888/" and e.internal == True and e.scope_distance == 2]) @@ -436,16 +442,16 @@ def custom_setup(scan): assert 1 == len([e for e in all_events_nodups if e.type == "URL" and e.data == "http://127.0.0.1:8888/" and e.internal == False and e.scope_distance == 0]) assert 1 == len([e for e in all_events_nodups if e.type == "HTTP_RESPONSE" and e.data["input"] == "127.0.0.1:8888" and e.internal == False and e.scope_distance == 0]) assert 1 == len([e for e in all_events_nodups if e.type == "URL_UNVERIFIED" and e.data == "http://127.0.0.1:8888/" and e.internal == False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events_nodups if e.type == "URL_UNVERIFIED" and e.data == "http://127.0.0.77:8888/" and e.internal == False and e.scope_distance == 1 and "spider-max" in e.tags and "spider-danger" in e.tags]) + assert 1 == len([e for e in all_events_nodups if e.type == "URL_UNVERIFIED" and e.data == "http://127.0.0.77:8888/" and e.internal == False and e.scope_distance == 1 and "spider-danger" in e.tags]) assert 1 == len([e for e in all_events_nodups if e.type == "IP_ADDRESS" and e.data == "127.0.0.77" and e.internal == False and e.scope_distance == 1]) - assert 1 == len([e for e in all_events_nodups if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.77:8888" and e.internal == False and e.scope_distance == 1]) + assert 1 == len([e for e in all_events_nodups if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.77:8888" and e.internal == True and e.scope_distance == 1]) assert 1 == len([e for e in all_events_nodups if e.type == "URL" and e.data == "http://127.0.0.77:8888/" and e.internal == False and e.scope_distance == 1]) - assert 1 == len([e for e in all_events_nodups if e.type == "HTTP_RESPONSE" and e.data["input"] == "127.0.0.77:8888" and e.internal == False and e.scope_distance == 1]) + assert 1 == len([e for e in all_events_nodups if e.type == "HTTP_RESPONSE" and e.data["url"] == "http://127.0.0.77:8888/" and e.internal == False and e.scope_distance == 1]) assert 1 == len([e for e in all_events_nodups if e.type == "IP_ADDRESS" and e.data == "127.0.0.88" and e.internal == True and e.scope_distance == 2]) assert 1 == len([e for e in all_events_nodups if e.type == "URL_UNVERIFIED" and e.data == "http://127.0.0.88:8888/" and e.internal == True and e.scope_distance == 2]) for _graph_output_events in (graph_output_events, graph_output_batch_events): - assert len(_graph_output_events) == 8 + assert len(_graph_output_events) == 7 assert 1 == len([e for e in _graph_output_events if e.type == "IP_RANGE" and e.data == "127.0.0.0/31" and e.internal == False and e.scope_distance == 0]) assert 0 == len([e for e in _graph_output_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.0"]) assert 1 == len([e for e in _graph_output_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.1" and e.internal == False and e.scope_distance == 0]) @@ -454,11 +460,11 @@ def custom_setup(scan): assert 1 == len([e for e in _graph_output_events if e.type == "URL" and e.data == "http://127.0.0.1:8888/" and e.internal == False and e.scope_distance == 0]) assert 0 == len([e for e in _graph_output_events if e.type == "HTTP_RESPONSE" and e.data["input"] == "127.0.0.1:8888"]) assert 0 == len([e for e in _graph_output_events if e.type == "URL_UNVERIFIED" and e.data == "http://127.0.0.1:8888/"]) - assert 0 == len([e for e in _graph_output_events if e.type == "URL_UNVERIFIED" and e.data == "http://127.0.0.77:8888/" and "spider-max" in e.tags and "spider-danger" in e.tags]) + assert 0 == len([e for e in _graph_output_events if e.type == "URL_UNVERIFIED" and e.data == "http://127.0.0.77:8888/" and "spider-danger" in e.tags]) assert 1 == len([e for e in _graph_output_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.77" and e.internal == False and e.scope_distance == 1]) - assert 1 == len([e for e in _graph_output_events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.77:8888" and e.internal == False and e.scope_distance == 1]) + assert 0 == len([e for e in _graph_output_events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.77:8888"]) assert 1 == len([e for e in _graph_output_events if e.type == "URL" and e.data == "http://127.0.0.77:8888/" and e.internal == False and e.scope_distance == 1]) - assert 0 == len([e for e in _graph_output_events if e.type == "HTTP_RESPONSE" and e.data["input"] == "127.0.0.77:8888"]) + assert 0 == len([e for e in _graph_output_events if e.type == "HTTP_RESPONSE" and e.data["url"] == "http://127.0.0.77:8888/"]) assert 0 == len([e for e in _graph_output_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.88"]) assert 0 == len([e for e in _graph_output_events if e.type == "URL_UNVERIFIED" and e.data == "http://127.0.0.88:8888/"]) @@ -476,7 +482,7 @@ def custom_setup(scan): }, ) - assert len(events) == 8 + assert len(events) == 7 assert 1 == len([e for e in events if e.type == "IP_RANGE" and e.data == "127.0.0.0/31" and e.internal == False and e.scope_distance == 0]) assert 0 == len([e for e in events if e.type == "IP_ADDRESS" and e.data == "127.0.0.0"]) assert 1 == len([e for e in events if e.type == "IP_ADDRESS" and e.data == "127.0.0.1" and e.internal == False and e.scope_distance == 0]) @@ -487,13 +493,13 @@ def custom_setup(scan): assert 0 == len([e for e in events if e.type == "URL_UNVERIFIED" and e.data == "http://127.0.0.1:8888/"]) assert 0 == len([e for e in events if e.type == "URL_UNVERIFIED" and e.data == "http://127.0.0.77:8888/"]) assert 1 == len([e for e in events if e.type == "IP_ADDRESS" and e.data == "127.0.0.77" and e.internal == False and e.scope_distance == 1]) - assert 1 == len([e for e in events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.77:8888" and e.internal == False and e.scope_distance == 1]) + assert 0 == len([e for e in events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.77:8888"]) assert 1 == len([e for e in events if e.type == "URL" and e.data == "http://127.0.0.77:8888/" and e.internal == False and e.scope_distance == 1]) - assert 0 == len([e for e in events if e.type == "HTTP_RESPONSE" and e.data["input"] == "127.0.0.77:8888"]) + assert 0 == len([e for e in events if e.type == "HTTP_RESPONSE" and e.data["url"] == "http://127.0.0.77:8888/"]) assert 0 == len([e for e in events if e.type == "IP_ADDRESS" and e.data == "127.0.0.88"]) assert 0 == len([e for e in events if e.type == "URL_UNVERIFIED" and e.data == "http://127.0.0.77:8888/"]) - assert len(all_events) == 24 + assert len(all_events) == 23 assert 1 == len([e for e in all_events if e.type == "IP_RANGE" and e.data == "127.0.0.0/31" and e.internal == False and e.scope_distance == 0]) assert 1 == len([e for e in all_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.0" and e.internal == True and e.scope_distance == 0]) assert 2 == len([e for e in all_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.1" and e.internal == False and e.scope_distance == 0]) @@ -502,16 +508,16 @@ def custom_setup(scan): assert 1 == len([e for e in all_events if e.type == "URL" and e.data == "http://127.0.0.1:8888/" and e.internal == False and e.scope_distance == 0]) assert 1 == len([e for e in all_events if e.type == "HTTP_RESPONSE" and e.data["input"] == "127.0.0.1:8888" and e.internal == False and e.scope_distance == 0]) assert 1 == len([e for e in all_events if e.type == "URL_UNVERIFIED" and e.data == "http://127.0.0.1:8888/" and e.internal == False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events if e.type == "URL_UNVERIFIED" and e.data == "http://127.0.0.77:8888/" and e.internal == False and e.scope_distance == 1 and "spider-max" in e.tags and "spider-danger" in e.tags]) + assert 1 == len([e for e in all_events if e.type == "URL_UNVERIFIED" and e.data == "http://127.0.0.77:8888/" and e.internal == False and e.scope_distance == 1 and "spider-danger" in e.tags]) assert 1 == len([e for e in all_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.77" and e.internal == False and e.scope_distance == 1]) - assert 2 == len([e for e in all_events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.77:8888" and e.internal == False and e.scope_distance == 1]) + assert 1 == len([e for e in all_events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.77:8888" and e.internal == True and e.scope_distance == 1]) assert 1 == len([e for e in all_events if e.type == "URL" and e.data == "http://127.0.0.77:8888/" and e.internal == False and e.scope_distance == 1]) - assert 1 == len([e for e in all_events if e.type == "HTTP_RESPONSE" and e.data["input"] == "127.0.0.77:8888" and e.internal == False and e.scope_distance == 1]) + assert 1 == len([e for e in all_events if e.type == "HTTP_RESPONSE" and e.data["url"] == "http://127.0.0.77:8888/" and e.internal == False and e.scope_distance == 1]) assert 1 == len([e for e in all_events if e.type == "URL_UNVERIFIED" and e.data == "http://127.0.0.88:8888/" and e.internal == True and e.scope_distance == 2]) assert 1 == len([e for e in all_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.88" and e.internal == True and e.scope_distance == 2]) assert 1 == len([e for e in all_events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.88:8888" and e.internal == True and e.scope_distance == 2]) assert 1 == len([e for e in all_events if e.type == "URL" and e.data == "http://127.0.0.88:8888/" and e.internal == True and e.scope_distance == 2]) - assert 1 == len([e for e in all_events if e.type == "HTTP_RESPONSE" and e.data["input"] == "127.0.0.88:8888" and e.internal == True and e.scope_distance == 2]) + assert 1 == len([e for e in all_events if e.type == "HTTP_RESPONSE" and e.data["url"] == "http://127.0.0.88:8888/" and e.internal == True and e.scope_distance == 2]) assert 1 == len([e for e in all_events if e.type == "URL_UNVERIFIED" and e.data == "http://127.0.0.99:8888/" and e.internal == True and e.scope_distance == 3]) assert 1 == len([e for e in all_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.99" and e.internal == True and e.scope_distance == 3]) @@ -524,21 +530,21 @@ def custom_setup(scan): assert 1 == len([e for e in all_events_nodups if e.type == "URL" and e.data == "http://127.0.0.1:8888/" and e.internal == False and e.scope_distance == 0]) assert 1 == len([e for e in all_events_nodups if e.type == "HTTP_RESPONSE" and e.data["input"] == "127.0.0.1:8888" and e.internal == False and e.scope_distance == 0]) assert 1 == len([e for e in all_events_nodups if e.type == "URL_UNVERIFIED" and e.data == "http://127.0.0.1:8888/" and e.internal == False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events_nodups if e.type == "URL_UNVERIFIED" and e.data == "http://127.0.0.77:8888/" and e.internal == False and e.scope_distance == 1 and "spider-max" in e.tags and "spider-danger" in e.tags]) + assert 1 == len([e for e in all_events_nodups if e.type == "URL_UNVERIFIED" and e.data == "http://127.0.0.77:8888/" and e.internal == False and e.scope_distance == 1 and "spider-danger" in e.tags]) assert 1 == len([e for e in all_events_nodups if e.type == "IP_ADDRESS" and e.data == "127.0.0.77" and e.internal == False and e.scope_distance == 1]) - assert 1 == len([e for e in all_events_nodups if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.77:8888" and e.internal == False and e.scope_distance == 1]) + assert 1 == len([e for e in all_events_nodups if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.77:8888" and e.internal == True and e.scope_distance == 1]) assert 1 == len([e for e in all_events_nodups if e.type == "URL" and e.data == "http://127.0.0.77:8888/" and e.internal == False and e.scope_distance == 1]) - assert 1 == len([e for e in all_events_nodups if e.type == "HTTP_RESPONSE" and e.data["input"] == "127.0.0.77:8888" and e.internal == False and e.scope_distance == 1]) + assert 1 == len([e for e in all_events_nodups if e.type == "HTTP_RESPONSE" and e.data["url"] == "http://127.0.0.77:8888/" and e.internal == False and e.scope_distance == 1]) assert 1 == len([e for e in all_events_nodups if e.type == "URL_UNVERIFIED" and e.data == "http://127.0.0.88:8888/" and e.internal == True and e.scope_distance == 2]) assert 1 == len([e for e in all_events_nodups if e.type == "IP_ADDRESS" and e.data == "127.0.0.88" and e.internal == True and e.scope_distance == 2]) assert 1 == len([e for e in all_events_nodups if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.88:8888" and e.internal == True and e.scope_distance == 2]) assert 1 == len([e for e in all_events_nodups if e.type == "URL" and e.data == "http://127.0.0.88:8888/" and e.internal == True and e.scope_distance == 2]) - assert 1 == len([e for e in all_events_nodups if e.type == "HTTP_RESPONSE" and e.data["input"] == "127.0.0.88:8888" and e.internal == True and e.scope_distance == 2]) + assert 1 == len([e for e in all_events_nodups if e.type == "HTTP_RESPONSE" and e.data["url"] == "http://127.0.0.88:8888/" and e.internal == True and e.scope_distance == 2]) assert 1 == len([e for e in all_events_nodups if e.type == "URL_UNVERIFIED" and e.data == "http://127.0.0.99:8888/" and e.internal == True and e.scope_distance == 3]) assert 1 == len([e for e in all_events_nodups if e.type == "IP_ADDRESS" and e.data == "127.0.0.99" and e.internal == True and e.scope_distance == 3]) for _graph_output_events in (graph_output_events, graph_output_batch_events): - assert len(_graph_output_events) == 8 + assert len(_graph_output_events) == 7 assert 1 == len([e for e in _graph_output_events if e.type == "IP_RANGE" and e.data == "127.0.0.0/31" and e.internal == False and e.scope_distance == 0]) assert 0 == len([e for e in _graph_output_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.0"]) assert 1 == len([e for e in _graph_output_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.1" and e.internal == False and e.scope_distance == 0]) @@ -549,9 +555,9 @@ def custom_setup(scan): assert 0 == len([e for e in _graph_output_events if e.type == "URL_UNVERIFIED" and e.data == "http://127.0.0.1:8888/"]) assert 0 == len([e for e in _graph_output_events if e.type == "URL_UNVERIFIED" and e.data == "http://127.0.0.77:8888/"]) assert 1 == len([e for e in _graph_output_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.77" and e.internal == False and e.scope_distance == 1]) - assert 1 == len([e for e in _graph_output_events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.77:8888" and e.internal == False and e.scope_distance == 1]) + assert 0 == len([e for e in _graph_output_events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.77:8888"]) assert 1 == len([e for e in _graph_output_events if e.type == "URL" and e.data == "http://127.0.0.77:8888/" and e.internal == False and e.scope_distance == 1]) - assert 0 == len([e for e in _graph_output_events if e.type == "HTTP_RESPONSE" and e.data["input"] == "127.0.0.77:8888"]) + assert 0 == len([e for e in _graph_output_events if e.type == "HTTP_RESPONSE" and e.data["url"] == "http://127.0.0.77:8888/"]) assert 0 == len([e for e in _graph_output_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.88"]) assert 0 == len([e for e in _graph_output_events if e.type == "URL_UNVERIFIED" and e.data == "http://127.0.0.88:8888/"]) @@ -571,7 +577,7 @@ def custom_setup(scan): }, ) - assert len(events) == 11 + assert len(events) == 9 assert 1 == len([e for e in events if e.type == "IP_RANGE" and e.data == "127.0.0.110/31" and e.internal == False and e.scope_distance == 0]) assert 0 == len([e for e in events if e.type == "IP_ADDRESS" and e.data == "127.0.0.110"]) assert 1 == len([e for e in events if e.type == "IP_ADDRESS" and e.data == "127.0.0.111" and e.internal == False and e.scope_distance == 0]) @@ -585,9 +591,9 @@ def custom_setup(scan): assert 0 == len([e for e in events if e.type == "URL_UNVERIFIED" and e.data == "http://127.0.0.33:8889/"]) assert 1 == len([e for e in events if e.type == "IP_ADDRESS" and e.data == "127.0.0.33" and e.internal == False and e.scope_distance == 0]) assert 0 == len([e for e in events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.222:8888"]) - assert 1 == len([e for e in events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.222:8889" and e.internal == False and e.scope_distance == 0]) + assert 0 == len([e for e in events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.222:8889"]) assert 0 == len([e for e in events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.33:8888"]) - assert 1 == len([e for e in events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.33:8889" and e.internal == False and e.scope_distance == 0]) + assert 0 == len([e for e in events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.33:8889"]) assert 1 == len([e for e in events if e.type == "URL" and e.data == "http://127.0.0.222:8889/" and e.internal == False and e.scope_distance == 0]) assert 0 == len([e for e in events if e.type == "HTTP_RESPONSE" and e.data["input"] == "127.0.0.222:8889"]) assert 1 == len([e for e in events if e.type == "URL" and e.data == "http://127.0.0.33:8889/" and e.internal == False and e.scope_distance == 0]) @@ -599,7 +605,9 @@ def custom_setup(scan): assert 0 == len([e for e in events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.44:8888"]) assert 0 == len([e for e in events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.55:8888"]) - assert len(all_events) == 31 + assert len(all_events) == 29 + for e in all_events: + log.critical(e) assert 1 == len([e for e in all_events if e.type == "IP_RANGE" and e.data == "127.0.0.110/31" and e.internal == False and e.scope_distance == 0]) assert 1 == len([e for e in all_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.110" and e.internal == True and e.scope_distance == 0]) assert 2 == len([e for e in all_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.111" and e.internal == False and e.scope_distance == 0]) @@ -613,13 +621,13 @@ def custom_setup(scan): assert 1 == len([e for e in all_events if e.type == "URL_UNVERIFIED" and e.data == "http://127.0.0.33:8889/" and e.internal == False and e.scope_distance == 0]) assert 1 == len([e for e in all_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.33" and e.internal == False and e.scope_distance == 0]) assert 1 == len([e for e in all_events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.222:8888" and e.internal == True and e.scope_distance == 0]) - assert 2 == len([e for e in all_events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.222:8889" and e.internal == False and e.scope_distance == 0]) + assert 1 == len([e for e in all_events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.222:8889" and e.internal == True and e.scope_distance == 0]) assert 1 == len([e for e in all_events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.33:8888" and e.internal == True and e.scope_distance == 0]) - assert 2 == len([e for e in all_events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.33:8889" and e.internal == False and e.scope_distance == 0]) + assert 1 == len([e for e in all_events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.33:8889" and e.internal == True and e.scope_distance == 0]) assert 1 == len([e for e in all_events if e.type == "URL" and e.data == "http://127.0.0.222:8889/" and e.internal == False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events if e.type == "HTTP_RESPONSE" and e.data["input"] == "127.0.0.222:8889" and e.internal == False and e.scope_distance == 0]) + assert 1 == len([e for e in all_events if e.type == "HTTP_RESPONSE" and e.data["url"] == "http://127.0.0.222:8889/" and e.internal == False and e.scope_distance == 0]) assert 1 == len([e for e in all_events if e.type == "URL" and e.data == "http://127.0.0.33:8889/" and e.internal == False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events if e.type == "HTTP_RESPONSE" and e.data["input"] == "127.0.0.33:8889" and e.internal == False and e.scope_distance == 0]) + assert 1 == len([e for e in all_events if e.type == "HTTP_RESPONSE" and e.data["url"] == "http://127.0.0.33:8889/" and e.internal == False and e.scope_distance == 0]) assert 1 == len([e for e in all_events if e.type == "URL_UNVERIFIED" and e.data == "http://127.0.0.44:8888/" and e.internal == True and e.scope_distance == 1]) assert 1 == len([e for e in all_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.44" and e.internal == True and e.scope_distance == 1]) assert 1 == len([e for e in all_events if e.type == "URL_UNVERIFIED" and e.data == "http://127.0.0.55:8888/" and e.internal == True and e.scope_distance == 1]) @@ -641,13 +649,13 @@ def custom_setup(scan): assert 1 == len([e for e in all_events_nodups if e.type == "URL_UNVERIFIED" and e.data == "http://127.0.0.33:8889/" and e.internal == False and e.scope_distance == 0]) assert 1 == len([e for e in all_events_nodups if e.type == "IP_ADDRESS" and e.data == "127.0.0.33" and e.internal == False and e.scope_distance == 0]) assert 1 == len([e for e in all_events_nodups if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.222:8888" and e.internal == True and e.scope_distance == 0]) - assert 1 == len([e for e in all_events_nodups if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.222:8889" and e.internal == False and e.scope_distance == 0]) + assert 1 == len([e for e in all_events_nodups if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.222:8889" and e.internal == True and e.scope_distance == 0]) assert 1 == len([e for e in all_events_nodups if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.33:8888" and e.internal == True and e.scope_distance == 0]) - assert 1 == len([e for e in all_events_nodups if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.33:8889" and e.internal == False and e.scope_distance == 0]) + assert 1 == len([e for e in all_events_nodups if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.33:8889" and e.internal == True and e.scope_distance == 0]) assert 1 == len([e for e in all_events_nodups if e.type == "URL" and e.data == "http://127.0.0.222:8889/" and e.internal == False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events_nodups if e.type == "HTTP_RESPONSE" and e.data["input"] == "127.0.0.222:8889" and e.internal == False and e.scope_distance == 0]) + assert 1 == len([e for e in all_events_nodups if e.type == "HTTP_RESPONSE" and e.data["url"] == "http://127.0.0.222:8889/" and e.internal == False and e.scope_distance == 0]) assert 1 == len([e for e in all_events_nodups if e.type == "URL" and e.data == "http://127.0.0.33:8889/" and e.internal == False and e.scope_distance == 0]) - assert 1 == len([e for e in all_events_nodups if e.type == "HTTP_RESPONSE" and e.data["input"] == "127.0.0.33:8889" and e.internal == False and e.scope_distance == 0]) + assert 1 == len([e for e in all_events_nodups if e.type == "HTTP_RESPONSE" and e.data["url"] == "http://127.0.0.33:8889/" and e.internal == False and e.scope_distance == 0]) assert 1 == len([e for e in all_events_nodups if e.type == "URL_UNVERIFIED" and e.data == "http://127.0.0.44:8888/" and e.internal == True and e.scope_distance == 1]) assert 1 == len([e for e in all_events_nodups if e.type == "IP_ADDRESS" and e.data == "127.0.0.44" and e.internal == True and e.scope_distance == 1]) assert 1 == len([e for e in all_events_nodups if e.type == "URL_UNVERIFIED" and e.data == "http://127.0.0.55:8888/" and e.internal == True and e.scope_distance == 1]) @@ -656,7 +664,7 @@ def custom_setup(scan): assert 1 == len([e for e in all_events_nodups if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.55:8888" and e.internal == True and e.scope_distance == 1]) for _graph_output_events in (graph_output_events, graph_output_batch_events): - assert len(_graph_output_events) == 11 + assert len(_graph_output_events) == 9 assert 1 == len([e for e in _graph_output_events if e.type == "IP_RANGE" and e.data == "127.0.0.110/31" and e.internal == False and e.scope_distance == 0]) assert 0 == len([e for e in _graph_output_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.110"]) assert 1 == len([e for e in _graph_output_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.111" and e.internal == False and e.scope_distance == 0]) @@ -670,9 +678,9 @@ def custom_setup(scan): assert 0 == len([e for e in _graph_output_events if e.type == "URL_UNVERIFIED" and e.data == "http://127.0.0.33:8889/"]) assert 1 == len([e for e in _graph_output_events if e.type == "IP_ADDRESS" and e.data == "127.0.0.33"]) assert 0 == len([e for e in _graph_output_events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.222:8888"]) - assert 1 == len([e for e in _graph_output_events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.222:8889" and e.internal == False and e.scope_distance == 0]) + assert 0 == len([e for e in _graph_output_events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.222:8889"]) assert 0 == len([e for e in _graph_output_events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.33:8888"]) - assert 1 == len([e for e in _graph_output_events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.33:8889" and e.internal == False and e.scope_distance == 0]) + assert 0 == len([e for e in _graph_output_events if e.type == "OPEN_TCP_PORT" and e.data == "127.0.0.33:8889"]) assert 1 == len([e for e in _graph_output_events if e.type == "URL" and e.data == "http://127.0.0.222:8889/" and e.internal == False and e.scope_distance == 0]) assert 0 == len([e for e in _graph_output_events if e.type == "HTTP_RESPONSE" and e.data["input"] == "127.0.0.222:8889"]) assert 1 == len([e for e in _graph_output_events if e.type == "URL" and e.data == "http://127.0.0.33:8889/" and e.internal == False and e.scope_distance == 0]) diff --git a/bbot/test/test_step_1/test_modules_basic.py b/bbot/test/test_step_1/test_modules_basic.py index b3b30f2ae..76c2373db 100644 --- a/bbot/test/test_step_1/test_modules_basic.py +++ b/bbot/test/test_step_1/test_modules_basic.py @@ -396,6 +396,7 @@ async def handle_event(self, event): "SCAN": 1, "DNS_NAME": 3, "URL": 1, + "ORG_STUB": 1, "URL_UNVERIFIED": 1, "FINDING": 1, "ORG_STUB": 1, @@ -416,11 +417,12 @@ async def handle_event(self, event): "DNS_NAME": 2, "FINDING": 1, "OPEN_TCP_PORT": 1, + "ORG_STUB": 1, "SCAN": 1, "URL": 1, "URL_UNVERIFIED": 1, } - assert dummy_stats.consumed_total == 7 + assert dummy_stats.consumed_total == 8 python_stats = scan.stats.module_stats["python"] assert python_stats.produced == {} diff --git a/bbot/test/test_step_1/test_scope.py b/bbot/test/test_step_1/test_scope.py index 7435b82af..ebd94333f 100644 --- a/bbot/test/test_step_1/test_scope.py +++ b/bbot/test/test_step_1/test_scope.py @@ -12,7 +12,7 @@ async def setup_after_prep(self, module_test): module_test.set_expect_requests(expect_args=expect_args, respond_args=respond_args) def check(self, module_test, events): - assert len(events) == 6 + assert len(events) == 5 assert 1 == len( [ e @@ -24,7 +24,7 @@ def check(self, module_test, events): ] ) # we have two of these because the host module considers "always_emit" in its outgoing deduplication - assert 2 == len( + assert 1 == len( [ e for e in events diff --git a/bbot/test/test_step_2/module_tests/test_module_docker_pull.py b/bbot/test/test_step_2/module_tests/test_module_docker_pull.py index b674d2c37..36d1da80a 100644 --- a/bbot/test/test_step_2/module_tests/test_module_docker_pull.py +++ b/bbot/test/test_step_2/module_tests/test_module_docker_pull.py @@ -439,7 +439,7 @@ def check(self, module_test, events): or "blacklanternsecurity_testimage_latest.tar" in e.data["path"] ) and "docker" in e.tags - and e.scope_distance == 2 + and e.scope_distance == 1 ] assert 2 == len(filesystem_events), "Failed to download docker images" filesystem_event = filesystem_events[0] diff --git a/bbot/test/test_step_2/module_tests/test_module_github_codesearch.py b/bbot/test/test_step_2/module_tests/test_module_github_codesearch.py index 80693192b..ad3c5eae7 100644 --- a/bbot/test/test_step_2/module_tests/test_module_github_codesearch.py +++ b/bbot/test/test_step_2/module_tests/test_module_github_codesearch.py @@ -5,7 +5,7 @@ class TestGithub_Codesearch(ModuleTestBase): config_overrides = { "modules": {"github_codesearch": {"api_key": "asdf", "limit": 1}}, "omit_event_types": [], - "scope": {"report_distance": 1}, + "scope": {"report_distance": 2}, } modules_overrides = ["github_codesearch", "httpx", "secretsdb"] @@ -57,7 +57,7 @@ def check(self, module_test, events): [ e for e in events - if e.type == "URL_UNVERIFIED" and e.data == self.github_file_url and e.scope_distance == 1 + if e.type == "URL_UNVERIFIED" and e.data == self.github_file_url and e.scope_distance == 2 ] ), "Failed to emit URL_UNVERIFIED" assert 1 == len( @@ -71,13 +71,13 @@ def check(self, module_test, events): ] ), "Failed to emit CODE_REPOSITORY" assert 1 == len( - [e for e in events if e.type == "URL" and e.data == self.github_file_url and e.scope_distance == 1] + [e for e in events if e.type == "URL" and e.data == self.github_file_url and e.scope_distance == 2] ), "Failed to visit URL" assert 1 == len( [ e for e in events - if e.type == "HTTP_RESPONSE" and e.data["url"] == self.github_file_url and e.scope_distance == 1 + if e.type == "HTTP_RESPONSE" and e.data["url"] == self.github_file_url and e.scope_distance == 2 ] ), "Failed to visit URL" assert [ diff --git a/bbot/test/test_step_2/module_tests/test_module_github_org.py b/bbot/test/test_step_2/module_tests/test_module_github_org.py index b75d51238..a4313d182 100644 --- a/bbot/test/test_step_2/module_tests/test_module_github_org.py +++ b/bbot/test/test_step_2/module_tests/test_module_github_org.py @@ -284,7 +284,7 @@ async def setup_before_prep(self, module_test): ) def check(self, module_test, events): - assert len(events) == 7 + assert len(events) == 6 assert 1 == len( [ e @@ -293,7 +293,7 @@ def check(self, module_test, events): ] ), "Failed to emit target DNS_NAME" assert 1 == len( - [e for e in events if e.type == "ORG_STUB" and e.data == "blacklanternsecurity" and e.scope_distance == 1] + [e for e in events if e.type == "ORG_STUB" and e.data == "blacklanternsecurity" and e.scope_distance == 0] ), "Failed to find ORG_STUB" assert 1 == len( [ @@ -307,17 +307,6 @@ def check(self, module_test, events): and e.scope_distance == 1 ] ), "Failed to find blacklanternsecurity github" - assert 1 == len( - [ - e - for e in events - if e.type == "SOCIAL" - and e.data["platform"] == "github" - and e.data["profile_name"] == "blacklanternsecurity" - and str(e.module) == "social" - and e.scope_distance == 1 - ] - ), "Failed to find blacklanternsecurity github (social module)" assert 1 == len( [ e @@ -346,7 +335,7 @@ class TestGithub_Org_No_Members(TestGithub_Org): config_overrides = {"modules": {"github_org": {"include_members": False}}} def check(self, module_test, events): - assert len(events) == 6 + assert len(events) == 5 assert 1 == len( [ e @@ -359,17 +348,6 @@ def check(self, module_test, events): and e.scope_distance == 1 ] ), "Failed to find blacklanternsecurity github" - assert 1 == len( - [ - e - for e in events - if e.type == "SOCIAL" - and e.data["platform"] == "github" - and e.data["profile_name"] == "blacklanternsecurity" - and str(e.module) == "social" - and e.scope_distance == 1 - ] - ), "Failed to find blacklanternsecurity github (social module)" assert 0 == len( [ e @@ -385,7 +363,7 @@ class TestGithub_Org_MemberRepos(TestGithub_Org): config_overrides = {"modules": {"github_org": {"include_member_repos": True}}} def check(self, module_test, events): - assert len(events) == 8 + assert len(events) == 7 assert 1 == len( [ e @@ -403,21 +381,9 @@ class TestGithub_Org_Custom_Target(TestGithub_Org): config_overrides = {"scope": {"report_distance": 10}, "omit_event_types": [], "speculate": True} def check(self, module_test, events): - assert len(events) == 8 - assert 1 == len( - [e for e in events if e.type == "ORG_STUB" and e.data == "blacklanternsecurity" and e.scope_distance == 1] - ) + assert len(events) == 7 assert 1 == len( - [ - e - for e in events - if e.type == "SOCIAL" - and e.data["platform"] == "github" - and e.data["profile_name"] == "blacklanternsecurity" - and e.scope_distance == 1 - and str(e.module) == "social" - and e.parent.type == "URL_UNVERIFIED" - ] + [e for e in events if e.type == "ORG_STUB" and e.data == "blacklanternsecurity" and e.scope_distance == 0] ) assert 1 == len( [ diff --git a/bbot/test/test_step_2/module_tests/test_module_github_workflows.py b/bbot/test/test_step_2/module_tests/test_module_github_workflows.py index 4cb6fff41..7d7340947 100644 --- a/bbot/test/test_step_2/module_tests/test_module_github_workflows.py +++ b/bbot/test/test_step_2/module_tests/test_module_github_workflows.py @@ -439,7 +439,7 @@ async def setup_before_prep(self, module_test): ) def check(self, module_test, events): - assert len(events) == 8 + assert len(events) == 7 assert 1 == len( [ e @@ -448,7 +448,7 @@ def check(self, module_test, events): ] ), "Failed to emit target DNS_NAME" assert 1 == len( - [e for e in events if e.type == "ORG_STUB" and e.data == "blacklanternsecurity" and e.scope_distance == 1] + [e for e in events if e.type == "ORG_STUB" and e.data == "blacklanternsecurity" and e.scope_distance == 0] ), "Failed to find ORG_STUB" assert 1 == len( [ @@ -462,18 +462,6 @@ def check(self, module_test, events): and e.scope_distance == 1 ] ), "Failed to find blacklanternsecurity github" - assert 1 == len( - [ - e - for e in events - if e.type == "SOCIAL" - and e.data["platform"] == "github" - and e.data["profile_name"] == "blacklanternsecurity" - and e.data["url"] == "https://github.com/blacklanternsecurity" - and str(e.module) == "social" - and e.scope_distance == 1 - ] - ), "Failed to find blacklanternsecurity github" assert 1 == len( [ e diff --git a/bbot/test/test_step_2/module_tests/test_module_neo4j.py b/bbot/test/test_step_2/module_tests/test_module_neo4j.py index 9db35cff7..98107481a 100644 --- a/bbot/test/test_step_2/module_tests/test_module_neo4j.py +++ b/bbot/test/test_step_2/module_tests/test_module_neo4j.py @@ -2,6 +2,8 @@ class TestNeo4j(ModuleTestBase): + config_overrides = {"modules": {"neo4j": {"uri": "bolt://127.0.0.1:11111"}}} + async def setup_before_prep(self, module_test): # install neo4j deps_pip = module_test.preloaded["neo4j"]["deps"]["pip"] diff --git a/docs/dev/module_howto.md b/docs/dev/module_howto.md index e3a3d0cbf..ff37fd98c 100644 --- a/docs/dev/module_howto.md +++ b/docs/dev/module_howto.md @@ -55,6 +55,23 @@ After saving the module, you can run it with `-m`: bbot -t evilcorp.com -m whois ``` +### Debugging Your Module - BBOT's Colorful Log Functions + +You probably noticed the use of `self.hugesuccess()`. This function is part of BBOT's builtin logging capabilty, and it prints whatever you give it in bright green. These colorful log functions can be useful for debugging. + +**BBOT log levels**: + +- `critical`: bright red +- `hugesuccess`: bright green +- `hugewarning`: bright orange +- `hugeinfo`: bright blue +- `error`: red +- `warning`: orange +- `info`: blue +- `verbose`: grey (must use `-v` to see) +- `debug`: grey (must use `-d` to see) + + For details on how tests are written, see [Unit Tests](./tests.md). ## `handle_event()` and `emit_event()` diff --git a/docs/dev/tests.md b/docs/dev/tests.md index ebe9313b4..f5d05fcf9 100644 --- a/docs/dev/tests.md +++ b/docs/dev/tests.md @@ -74,10 +74,30 @@ class TestMyModule(ModuleTestBase): def check(self, module_test, events): # here is where we check to make sure it worked dns_names = [e.data for e in events if e.type == "DNS_NAME"] + # temporary log messages for debugging + for e in dns_names: + self.log.critical(e) assert "www.blacklanternsecurity.com" in dns_names, "failed to find subdomain #1" assert "dev.blacklanternsecurity.com" in dns_names, "failed to find subdomain #2" ``` +### Debugging a test + +Similar to debugging from within a module, you can debug from within a test using `self.log.critical()`, etc: + +```python + def check(self, module_test, events): + for e in events: + # bright red + self.log.critical(e.type) + # bright green + self.log.hugesuccess(e.data) + # bright orange + self.log.hugewarning(e.tags) + # bright blue + self.log.hugeinfo(e.parent) +``` + ### More advanced tests If you have questions about tests or need to write a more advanced test, come talk to us on [GitHub](https://github.com/blacklanternsecurity/bbot/discussions) or [Discord](https://discord.com/invite/PZqkgxu5SA).