diff --git a/bbot/core/helpers/web.py b/bbot/core/helpers/web.py index 7d801ca5af..c5ad95cff7 100644 --- a/bbot/core/helpers/web.py +++ b/bbot/core/helpers/web.py @@ -393,6 +393,18 @@ async def curl(self, *args, **kwargs): output = (await self.parent_helper.run(curl_command)).stdout return output + def is_spider_danger(self, source_event, url): + """ + Todo: write tests for this + """ + url_depth = self.parent_helper.url_depth(url) + web_spider_depth = self.parent_helper.scan.config.get("web_spider_depth", 1) + spider_distance = getattr(source_event, "web_spider_distance", 0) + 1 + web_spider_distance = self.parent_helper.scan.config.get("web_spider_distance", 0) + if (url_depth > web_spider_depth) or (spider_distance > web_spider_distance): + return True + return False + user_keywords = [re.compile(r, re.I) for r in ["user", "login", "email"]] pass_keywords = [re.compile(r, re.I) for r in ["pass"]] diff --git a/bbot/modules/base.py b/bbot/modules/base.py index e0249f4cf9..f4e7074086 100644 --- a/bbot/modules/base.py +++ b/bbot/modules/base.py @@ -10,79 +10,114 @@ class BaseModule: - # Event types to watch + """The base class for all BBOT modules. + + Attributes: + watched_events (List): Event types to watch. + + produced_events (List): Event types to produce. + + meta (Dict): Metadata about the module, such as whether authentication is required and a description. + + flags (List): Flags indicating the type of module (must have at least "safe" or "aggressive" and "passive" or "active"). + + deps_pip (List): Python dependencies to install via pip. Empty list by default. + + deps_apt (List): APT package dependencies to install. Empty list by default. + + deps_shell (List): Other dependencies installed via shell commands. Uses [ansible.builtin.shell](https://docs.ansible.com/ansible/latest/collections/ansible/builtin/shell_module.html). Empty list by default. + + deps_ansible (List): Additional Ansible tasks for complex dependencies. Empty list by default. + + accept_dupes (bool): Whether to accept incoming duplicate events. Default is False. + + suppress_dupes (bool): Whether to suppress outgoing duplicate events. Default is True. + + per_host_only (bool): Limit the module to only scanning once per host. Default is False. + + scope_distance_modifier (int, None): Modifies scope distance acceptance for events. Default is 0. + ``` + None == accept all events + 2 == accept events up to and including the scan's configured search distance plus two + 1 == accept events up to and including the scan's configured search distance plus one + 0 == (DEFAULT) accept events up to and including the scan's configured search distance + ``` + + target_only (bool): Accept only the initial target event(s). Default is False. + + in_scope_only (bool): Accept only explicitly in-scope events. Default is False. + + options (Dict): Customizable options for the module, e.g., {"api_key": ""}. Empty dict by default. + + options_desc (Dict): Descriptions for options, e.g., {"api_key": "API Key"}. Empty dict by default. + + max_event_handlers (int): Maximum concurrent instances of handle_event() or handle_batch(). Default is 1. + + batch_size (int): Size of batches processed by handle_batch(). Default is 1. + + batch_wait (int): Seconds to wait before force-submitting a batch. Default is 10. + + failed_request_abort_threshold (int): Threshold for setting error state after failed HTTP requests (only takes effect when `request_with_fail_count()` is used. Default is 5. + + _scope_shepherding (bool): When set to false, prevents events generated by this module from being automatically marked as in-scope. Default is True. Useful for low-confidence modules like speculate and ipneighbor. + + _stats_exclude (bool): Whether to exclude this module from scan statistics. Default is False. + + _qsize (int): Outgoing queue size (0 for infinite). Default is 0. + + _priority (int): Priority level of events raised by this module, 1-5. Default is 3. + + _name (str): Module name, overridden automatically. Default is 'base'. + + _type (str): Module type, for differentiating between normal and output modules. Default is 'scan'. + """ + watched_events = [] - # Event types to produce produced_events = [] - # Module description, etc. meta = {"auth_required": False, "description": "Base module"} - # Flags, must include either "passive" or "active" flags = [] + options = {} + options_desc = {} - # python dependencies (pip install ____) deps_pip = [] - # apt dependencies (apt install ____) deps_apt = [] - # other dependences as shell commands - # uses ansible.builtin.shell (https://docs.ansible.com/ansible/latest/collections/ansible/builtin/shell_module.html) deps_shell = [] - # list of ansible tasks for when other dependency installation methods aren't enough deps_ansible = [] - # Whether to accept incoming duplicate events + accept_dupes = False - # Whether to block outgoing duplicate events suppress_dupes = True - # Limit the module to only scanning once per host. By default, defined by event.host, but can be customized by overriding per_host_only = False - - # Scope distance modifier - accept/deny events based on scope distance - # None == accept all events - # 2 == accept events up to and including the scan's configured search distance plus two - # 1 == accept events up to and including the scan's configured search distance plus one - # 0 == (DEFAULT) accept events up to and including the scan's configured search distance - # -1 == accept events up to and including the scan's configured search distance minus one - # -2 == accept events up to and including the scan's configured search distance minus two scope_distance_modifier = 0 - # Only accept the initial target event(s) target_only = False - # Only accept explicitly in-scope events (scope distance == 0) - # Use this options if your module is aggressive or if you don't want it to scale with - # the scan's search distance in_scope_only = False - # Options, e.g. {"api_key": ""} - options = {} - # Options description, e.g. {"api_key": "API Key"} - options_desc = {} - # Maximum concurrent instances of handle_event() or handle_batch() max_event_handlers = 1 - # Batch size - # If batch size > 1, override handle_batch() instead of handle_event() batch_size = 1 - # Seconds to wait before force-submitting batch batch_wait = 10 - # Use in conjunction with .request_with_fail_count() to set_error_state() after this many failed HTTP requests failed_request_abort_threshold = 5 - # When set to false, prevents events generated by this module from being automatically marked as in-scope - # Useful for low-confidence modules like speculate and ipneighbor + _scope_shepherding = True - # Exclude from scan statistics _stats_exclude = False - # outgoing queue size (0 == infinite) _qsize = 0 - # Priority of events raised by this module, 1-5, lower numbers == higher priority _priority = 3 - # Name, overridden automatically _name = "base" - # Type, for differentiating between normal modules and output modules, etc. _type = "scan" def __init__(self, scan): + """Initializes a module instance. + + Args: + scan: The BBOT scan object associated with this module instance. + + Attributes: + scan: The scan object associated with this module. + + errored (bool): Whether the module has errored out. Default is False. + """ self.scan = scan self.errored = False self._log = None self._incoming_event_queue = None - # seconds since we've submitted a batch self._outgoing_event_queue = None # seconds since we've submitted a batch self._last_submitted_batch = None @@ -107,66 +142,116 @@ def __init__(self, scan): self._per_host_tracker = set() async def setup(self): - """ - Perform setup functions at the beginning of the scan. - Optionally override this method. + """Asynchronously sets up the module at the beginning of the scan. + + This method can be overridden to perform any necessary setup logic. - Must return True or False based on whether the setup was successful + Returns: + bool or None: True if setup was successful. None for a soft-fail, which will produce a warning but not abort the scan. False for a hard-fail, which will abort the scan. """ return True async def handle_event(self, event): - """ - Override this method if batch_size == 1. + """Asynchronously handles incoming events that the module is configured to watch. + + This method is automatically invoked when an event that matches any in `watched_events` is encountered during a scan. Override this method to implement custom event-handling logic for your module. + + Args: + event (Event): The event object containing details about the incoming event. + + Note: + This method should be overridden if the `batch_size` attribute of the module is set to 1. + + Returns: + None """ pass def handle_batch(self, *events): - """ - Override this method if batch_size > 1. + """Handles incoming events in batches for optimized processing. + + This method is automatically called when multiple events that match any in `watched_events` are encountered and the `batch_size` attribute is set to a value greater than 1. Override this method to implement custom batch event-handling logic for your module. + + Args: + *events (Event): A variable number of Event objects to be processed in a batch. + + Note: + This method should be overridden if the `batch_size` attribute of the module is set to a value greater than 1. + + Returns: + None """ pass async def filter_event(self, event): - """ - Accept/reject events based on custom criteria + """Asynchronously filters incoming events based on custom criteria. + + Override this method for more granular control over which events are accepted by your module. This method is called automatically before `handle_event()` for each incoming event that matches any in `watched_events`. + + Args: + event (Event): The incoming Event object to be filtered. - Override this method if you need more granular control - over which events are distributed to your module + Returns: + tuple: A 2-tuple where the first value is a bool indicating whether the event should be accepted, and the second value is a string explaining the reason for its acceptance or rejection. By default, returns `(True, None)` to indicate acceptance without reason. + + Note: + This method should be overridden if the module requires custom logic for event filtering. """ return True async def finish(self): - """ - Perform final functions when scan is nearing completion + """Asynchronously performs final tasks as the scan nears completion. - For example, if your module relies on the word cloud, you may choose to wait until - the scan is finished (and the word cloud is most complete) before running an operation. + This method can be overridden to execute any necessary finalization logic. For example, if the module relies on a word cloud, you might wait for the scan to finish to ensure the word cloud is most complete before running an operation. - Note that this method may be called multiple times, because it may raise events. - Optionally override this method. + Returns: + None by default, but can return additional data if overridden. + + Warnings: + This method may be called multiple times since it can raise events, which may re-trigger the "finish" phase of the scan. Optional to override. """ return async def report(self): - """ - Perform a final task when the scan is finished, but before cleanup happens + """Asynchronously executes a final task after the scan is complete but before cleanup. - This is useful for modules that aggregate data and raise summary events at the end of a scan + This method can be overridden to aggregate data and raise summary events at the end of the scan. + + Returns: + None by default, but can return additional data if overridden. + + Note: + This method is called only once per scan. """ return async def cleanup(self): - """ - Perform final cleanup after the scan has finished - This method is called only once, and may not raise events. - Optionally override this method. + """Asynchronously performs final cleanup operations after the scan is complete. + + This method can be overridden to implement custom cleanup logic. It is called only once per scan and may not raise events. + + Returns: + None by default, but can return additional data if overridden. + + Note: + This method is called only once per scan and may not raise events. """ return async def require_api_key(self): - """ - Use in setup() to ensure the module is configured with an API key + """Asynchronously checks if the module is configured with a valid API key. + + This method is typically used within the setup() method to ensure that an API key is provided in the module configuration. Your module must define an 'api_key' in its config options for this method to work properly. + + Example Usage: + def setup(self): + return await self.require_api_key() + + Returns: + Tuple (bool, str): The first element is a boolean indicating whether the API is ready to use. The second element is a string message, either indicating that the API is ready or describing the error. + + Raises: + Exception: Any exceptions raised by the self.ping() method will propagate. """ self.api_key = self.config.get("api_key", "") if self.auth_secret: @@ -180,29 +265,43 @@ async def require_api_key(self): return None, "No API key set" async def ping(self): - """ - Used in conjuction with require_api_key to ensure an API is up and responding + """Asynchronously checks the health of the configured API. - Requires the use of an assert statement. + This method is used in conjunction with require_api_key() to verify that the API is not just configured, but also responsive. This method should include an assert statement to validate the API's health, typically by making a test request to a known endpoint. - E.g. if your API has a "/ping" endpoint, you can use it like this: - def ping(self): - r = self.request_with_fail_count(f"{self.base_url}/ping") + Example Usage: + In your implementation, if the API has a "/ping" endpoint: + async def ping(self): + r = await self.request_with_fail_count(f"{self.base_url}/ping") resp_content = getattr(r, "text", "") assert getattr(r, "status_code", 0) == 200, resp_content + + Returns: + None + + Raises: + AssertionError: If the API does not respond as expected. """ return @property def auth_secret(self): - """ - Use this to indicate whether the module has everything it needs for authentication + """Indicates if the module is properly configured for authentication. + + This read-only property should be used to check whether all necessary attributes (e.g., API keys, tokens, etc.) are configured to perform authenticated requests in the module. Commonly used in setup or initialization steps. + + Returns: + bool: True if the module is properly configured for authentication, otherwise False. """ return getattr(self, "api_key", "") def get_watched_events(self): - """ - Override if you need your watched_events to be dynamic + """Retrieve the set of events that the module is interested in observing. + + Override this method if the set of events the module should watch needs to be determined dynamically, e.g., based on configuration options or other runtime conditions. + + Returns: + set: The set of event types that this module will handle. """ if self._watched_events is None: self._watched_events = set(self.watched_events) @@ -215,7 +314,7 @@ async def _handle_batch(self): if self.batch_size <= 1: return if self.num_incoming_events > 0: - events, finish = await self.events_waiting() + events, finish = await self._events_waiting() if events and not self.errored: self.debug(f"Handling batch of {len(events):,} events") submitted = True @@ -229,6 +328,21 @@ async def _handle_batch(self): return submitted def make_event(self, *args, **kwargs): + """Create an event for the scan. + + Raises a validation error if the event could not be created, unless raise_error is set to False. + + Args: + *args: Positional arguments to be passed to the scan's make_event method. + **kwargs: Keyword arguments to be passed to the scan's make_event method. + raise_error (bool, optional): Whether to raise a validation error if the event could not be created. Defaults to False. + + Returns: + Event or None: The created event, or None if a validation error occurred and raise_error was False. + + Raises: + ValidationError: If the event could not be validated and raise_error is True. + """ raise_error = kwargs.pop("raise_error", False) try: event = self.scan.make_event(*args, **kwargs) @@ -242,6 +356,26 @@ def make_event(self, *args, **kwargs): return event def emit_event(self, *args, **kwargs): + """Emit an event to the event queue and distribute it to interested modules. + + The method first creates an event object by calling `self.make_event()` with the provided arguments. + Then, the event is queued for outgoing distribution using `self.queue_outgoing_event()`. + + Args: + *args: Positional arguments to be passed to `self.make_event()` for event creation. + **kwargs: Keyword arguments to be passed for event creation or configuration of the emit action. + ```markdown + - on_success_callback: Optional callback function to execute upon successful event emission. + - abort_if: Optional condition under which the event emission should be aborted. + - quick: Optional flag to indicate whether the event should be processed quickly. + ``` + + Returns: + None + + Raises: + ValidationError: If the event cannot be validated (handled in `self.make_event()`). + """ event_kwargs = dict(kwargs) emit_kwargs = {} for o in ("on_success_callback", "abort_if", "quick"): @@ -253,16 +387,42 @@ def emit_event(self, *args, **kwargs): self.queue_outgoing_event(event, **emit_kwargs) async def emit_event_wait(self, *args, **kwargs): - """ - Same as emit_event except we wait on the outgoing queue + """Emit an event to the event queue and await until there is space in the outgoing queue. + + This method is similar to `emit_event`, but it waits until there's sufficient space in the outgoing + event queue before emitting the event. It utilizes the queue size threshold defined in `self._qsize`. + + Args: + *args: Positional arguments to be passed to `emit_event()` for event creation. + **kwargs: Keyword arguments to be passed to `emit_event()` for event creation or configuration. + + Returns: + None + + See Also: + emit_event: For emitting an event without waiting on the queue size. """ while self.outgoing_event_queue.qsize() > self._qsize: await self.helpers.sleep(0.2) return self.emit_event(*args, **kwargs) - async def events_waiting(self): + async def _events_waiting(self): """ - yields all events in queue, up to maximum batch size + Asynchronously fetches events from the incoming_event_queue, up to a specified batch size. + + Args: + None + + Returns: + tuple: A tuple containing two elements: + - events (list): A list of acceptable events from the queue. + - finish (bool): A flag indicating if a "FINISHED" event is encountered. + + Notes: + - The method pulls events from incoming_event_queue using 'get_nowait()'. + - Events go through '_event_postcheck()' for validation. + - "FINISHED" events are handled differently and the finish flag is set to True. + - If the queue is empty or the batch size is reached, the loop breaks. """ events = [] finish = False @@ -300,6 +460,27 @@ def start(self): self._tasks = [asyncio.create_task(self._worker()) for _ in range(self._max_event_handlers)] async def _setup(self): + """ + Asynchronously sets up the module by invoking its 'setup()' method. + + This method catches exceptions during setup, sets the module's error state if necessary, and determines the + status code based on the result of the setup process. + + Args: + None + + Returns: + tuple: A tuple containing the module's name, status (True for success, False for hard-fail, None for soft-fail), + and an optional status message. + + Raises: + Exception: Captured exceptions from the 'setup()' method are logged, but not propagated. + + Notes: + - The 'setup()' method can return either a simple boolean status or a tuple of status and message. + - A WordlistError exception triggers a soft-fail status. + - The debug log will contain setup status information for the module. + """ status_codes = {False: "hard-fail", None: "soft-fail", True: "success"} status = False @@ -322,6 +503,29 @@ async def _setup(self): return self.name, status, str(msg) async def _worker(self): + """ + The core worker loop for the module, responsible for handling events from the incoming event queue. + + This method is a coroutine and is run asynchronously. Multiple instances can run simultaneously based on + the 'max_event_handlers' configuration. The worker dequeues events from 'incoming_event_queue', performs + necessary prechecks, and passes the event to the appropriate handler function. + + Args: + None + + Returns: + None + + Raises: + asyncio.CancelledError: If the worker is cancelled during its operation. + + Notes: + - The worker is sensitive to the 'stopping' flag of the scan. It will terminate if this flag is set. + - The worker handles backpressure by pausing when the outgoing event queue is full. + - Batch processing is supported and is activated when 'batch_size' > 1. + - Each event is subject to a post-check via '_event_postcheck()' to decide whether it should be handled. + - Special 'FINISHED' events trigger the 'finish()' method of the module. + """ async with self.scan._acatch(context=self._worker): try: while not self.scan.stopping: @@ -375,9 +579,33 @@ def max_scope_distance(self): def _event_precheck(self, event): """ - Check if an event should be accepted by the module - Used when putting an event INTO the modules' queue + Pre-checks an event to determine if it should be accepted by the module for queuing. + + This method is called when an event is about to be enqueued into the module's incoming event queue. + It applies various filters such as special signal event types, module error state, watched event types, and more + to decide whether or not the event should be enqueued. + + Args: + event (Event): The event object to check. + + Returns: + tuple: A tuple (bool, str) where the bool indicates if the event should be accepted, and the str gives the reason. + + Examples: + >>> result, reason = self._event_precheck(event) + >>> if result: + ... self.incoming_event_queue.put_nowait(event) + ... else: + ... self.debug(f"Not accepting {event} because {reason}") + + Notes: + - The method considers special signal event types like "FINISHED". + - Checks whether the module is in an error state. + - Checks if the event type matches the types this module is interested in (`watched_events`). + - Checks for events tagged as 'target' if the module has `target_only` flag set. + - Applies specific filtering based on event type and module name. """ + # special signal event types if event.type in ("FINISHED",): return True, "its type is FINISHED" @@ -409,8 +637,29 @@ def _event_precheck(self, event): async def _event_postcheck(self, event): """ - Check if an event should be accepted by the module - Used when taking an event FROM the module's queue (immediately before it's handled) + Post-checks an event to determine if it should be accepted by the module for handling. + + This method is called when an event is dequeued from the module's incoming event queue, right before it is actually processed. + It applies various filters such as scope, custom filtering logic, and per-host tracking to decide the event's fate. + + Args: + event (Event): The event object to check. + + Returns: + tuple: A tuple (bool, str) where the bool indicates if the event should be accepted, and the str gives the reason. + + Examples: + >>> async def custom_filter(event): + ... if event.data not in ["evilcorp.com"]: + ... return False, "it's not on the cool list" + ... + >>> self.filter_event = custom_filter + >>> result, reason = await self._event_postcheck(event) + + Notes: + - Override the `filter_event` method for custom filtering logic. + - This method also maintains host-based tracking when the `per_host_only` flag is set. + - The method will also update event production stats for output modules. """ # special exception for "FINISHED" event if event.type in ("FINISHED",): @@ -474,7 +723,22 @@ async def _cleanup(self): async def queue_event(self, event): """ - Queue (incoming) event with module + Asynchronously queues an incoming event to the module's event queue for further processing. + + The function performs an initial check to see if the event is acceptable for queuing. + If the event passes the check, it is put into the `incoming_event_queue`. + + Args: + event: The event object to be queued. + + Returns: + None: The function doesn't return anything but modifies the state of the `incoming_event_queue`. + + Examples: + >>> await self.queue_event(some_event) + + Raises: + AttributeError: If the module is not in an acceptable state to queue incoming events. """ async with self._task_counter.count("queue_event()", _log=False): if self.incoming_event_queue is False: @@ -498,7 +762,23 @@ async def queue_event(self, event): def queue_outgoing_event(self, event, **kwargs): """ - Queue (outgoing) event with module + Queues an outgoing event to the module's outgoing event queue for further processing. + + The function attempts to put the event into the `outgoing_event_queue` immediately. + If it's not possible due to the current state of the module, an AttributeError is raised, and a debug log is generated. + + Args: + event: The event object to be queued. + **kwargs: Additional keyword arguments to be associated with the event. + + Returns: + None: The function doesn't return anything but modifies the state of the `outgoing_event_queue`. + + Examples: + >>> self.queue_outgoing_event(some_outgoing_event, abort_if=lambda e: "unresolved" in e.tags) + + Raises: + AttributeError: If the module is not in an acceptable state to queue outgoing events. """ try: self.outgoing_event_queue.put_nowait((event, kwargs)) @@ -506,6 +786,26 @@ def queue_outgoing_event(self, event, **kwargs): self.debug(f"Not in an acceptable state to queue outgoing event") def set_error_state(self, message=None): + """ + Puts the module into an errored state where it cannot accept new events. Optionally logs a warning message. + + The function sets the module's `errored` attribute to True and logs a warning with the optional message. + It also clears the incoming event queue to prevent further processing and updates its status to False. + + Args: + message (str, optional): Additional message to be logged along with the warning. + + Returns: + None: The function doesn't return anything but updates the `errored` state and clears the incoming event queue. + + Examples: + >>> self.set_error_state() + >>> self.set_error_state("Failed to connect to the server") + + Notes: + - The function sets `self._incoming_event_queue` to False to prevent its further use. + - If the module was already in an errored state, the function will not reset the error state or the queue. + """ if not self.errored: log_msg = f"Setting error state for module {self.name}" if message is not None: @@ -522,8 +822,27 @@ def set_error_state(self, message=None): # if there are leftover objects in the queue, the scan will hang. self._incoming_event_queue = False - # override in the module to define different values to comprise the hash def get_per_host_hash(self, event): + """ + Computes a per-host hash value for a given event. This method may be optionally overridden in subclasses. + + The function uses the event's `host` and `port` or the parsed URL to create a string to be hashed. + The hash value is used for distinguishing events related to the same host. + + Args: + event (Event): The event object containing host, port, or parsed URL information. + + Returns: + int: The hash value computed for the host. + + Examples: + >>> event = self.make_event("https://example.com:8443") + >>> self.get_per_host_hash(event) + + Notes: + - To change the behavior, override this method in your custom module. + - The hash value is dependent on the `host` and `port` or the `parsed` attribute in the event object. + """ parsed = getattr(event, "parsed", None) if parsed is None: to_hash = self.helpers.make_netloc(event.host, event.port) @@ -541,6 +860,22 @@ def helpers(self): @property def status(self): + """ + Provides the current status of the module as a dictionary. + + The dictionary contains the following keys: + - 'events': A sub-dictionary with 'incoming' and 'outgoing' keys, representing the number of events in the respective queues. + - 'tasks': The current value of the task counter. + - 'errored': A boolean value indicating if the module is in an error state. + - 'running': A boolean value indicating if the module is currently processing data. + + Returns: + dict: A dictionary containing the current status of the module. + + Examples: + >>> self.status + {'events': {'incoming': 5, 'outgoing': 2}, 'tasks': 3, 'errored': False, 'running': True} + """ status = { "events": {"incoming": self.num_incoming_events, "outgoing": self.outgoing_event_queue.qsize()}, "tasks": self._task_counter.value, @@ -551,19 +886,47 @@ def status(self): @property def running(self): - """ - Indicates whether the module is currently processing data. + """Property indicating whether the module is currently processing data. + + This property checks if the task counter (`self._task_counter.value`) is greater than zero, + indicating that there are ongoing tasks in the module. + + Returns: + bool: True if the module is currently processing data, False otherwise. """ return self._task_counter.value > 0 @property def finished(self): - """ - Indicates whether the module is finished (not running and nothing in queues) + """Property indicating whether the module has finished processing. + + This property checks three conditions to determine if the module is finished: + 1. The module is not currently running (`self.running` is False). + 2. The number of incoming events in the queue is zero or less (`self.num_incoming_events <= 0`). + 3. The number of outgoing events in the queue is zero or less (`self.outgoing_event_queue.qsize() <= 0`). + + Returns: + bool: True if the module has finished processing, False otherwise. """ return not self.running and self.num_incoming_events <= 0 and self.outgoing_event_queue.qsize() <= 0 async def request_with_fail_count(self, *args, **kwargs): + """Asynchronously perform an HTTP request while keeping track of consecutive failures. + + This function wraps the `self.helpers.request` method, incrementing a failure counter if + the request returns None. When the failure counter exceeds `self.failed_request_abort_threshold`, + the module is set to an error state. + + Args: + *args: Positional arguments to pass to `self.helpers.request`. + **kwargs: Keyword arguments to pass to `self.helpers.request`. + + Returns: + Any: The response object or None if the request failed. + + Raises: + None: Sets the module to an error state when the failure threshold is reached. + """ r = await self.helpers.request(*args, **kwargs) if r is None: self._request_failures += 1 @@ -573,17 +936,16 @@ async def request_with_fail_count(self, *args, **kwargs): self.set_error_state(f"Setting error state due to {self._request_failures:,} failed HTTP requests") return r - def is_spider_danger(self, source_event, url): - url_depth = self.helpers.url_depth(url) - web_spider_depth = self.scan.config.get("web_spider_depth", 1) - spider_distance = getattr(source_event, "web_spider_distance", 0) + 1 - web_spider_distance = self.scan.config.get("web_spider_distance", 0) - if (url_depth > web_spider_depth) or (spider_distance > web_spider_distance): - return True - return False - @property def config(self): + """Property that provides easy access to the module's configuration in the scan's config. + + This property serves as a shortcut to retrieve the module-specific configuration from + `self.scan.config`. If no configuration is found for this module, an empty dictionary is returned. + + Returns: + dict: The configuration dictionary specific to this module. + """ config = self.scan.config.get("modules", {}).get(self.name, {}) if config is None: config = {} @@ -603,6 +965,19 @@ def outgoing_event_queue(self): @property def priority(self): + """ + Gets the priority level of the module as an integer. + + The priority level is constrained to be between 1 and 5, inclusive. + A lower value indicates a higher priority. + + Returns: + int: The priority level of the module, constrained between 1 and 5. + + Examples: + >>> self.priority + 3 + """ return int(max(1, min(5, self._priority))) @property @@ -624,8 +999,15 @@ def log(self): @property def memory_usage(self): - """ - Return how much memory the module is currently using in bytes + """Property that calculates the current memory usage of the module in bytes. + + This property uses the `get_size` function to estimate the memory consumption + of the module object. The depth of the object graph traversal is limited to 3 levels + to avoid performance issues. Commonly shared objects like `self.scan`, `self.helpers`, + are excluded from the calculation to prevent double-counting. + + Returns: + int: The estimated memory usage of the module in bytes. """ seen = {self.scan, self.helpers, self.log} # noqa return get_size(self, max_depth=3, seen=seen) @@ -634,6 +1016,21 @@ def __str__(self): return self.name def log_table(self, *args, **kwargs): + """Logs a table to the console and optionally writes it to a file. + + This function generates a table using `self.helpers.make_table`, then logs each line + of the table as an info-level log. If a table_name is provided, it also writes the table to a file. + + Args: + *args: Variable length argument list to be passed to `self.helpers.make_table`. + **kwargs: Arbitrary keyword arguments. If 'table_name' is specified, the table will be written to a file. + + Returns: + str: The generated table as a string. + + Examples: + >>> self.log_table(['Header1', 'Header2'], [['row1col1', 'row1col2'], ['row2col1', 'row2col2']], table_name="my_table") + """ table_name = kwargs.pop("table_name", None) table = self.helpers.make_table(*args, **kwargs) for line in table.splitlines(): @@ -647,64 +1044,208 @@ def log_table(self, *args, **kwargs): return table def stdout(self, *args, **kwargs): + """Writes log messages directly to standard output. + + This is typically reserved for output modules only, e.g. `human` or `json`. + + Args: + *args: Variable length argument list to be passed to `self.log.stdout`. + **kwargs: Arbitrary keyword arguments to be passed to `self.log.stdout`. + + Examples: + >>> self.stdout("This will be printed to stdout") + """ self.log.stdout(*args, extra={"scan_id": self.scan.id}, **kwargs) def debug(self, *args, trace=False, **kwargs): + """Logs debug messages and optionally the stack trace of the most recent exception. + + Args: + *args: Variable-length argument list to pass to the logger. + trace (bool, optional): Whether to log the stack trace of the most recently caught exception. Defaults to False. + **kwargs: Arbitrary keyword arguments to pass to the logger. + + Examples: + >>> self.debug("This is a debug message") + >>> self.debug("This is a debug message with a trace", trace=True) + """ self.log.debug(*args, extra={"scan_id": self.scan.id}, **kwargs) if trace: self.trace() def verbose(self, *args, trace=False, **kwargs): + """Logs messages and optionally the stack trace of the most recent exception. + + Args: + *args: Variable-length argument list to pass to the logger. + trace (bool, optional): Whether to log the stack trace of the most recently caught exception. Defaults to False. + **kwargs: Arbitrary keyword arguments to pass to the logger. + + Examples: + >>> self.verbose("This is a verbose message") + >>> self.verbose("This is a verbose message with a trace", trace=True) + """ self.log.verbose(*args, extra={"scan_id": self.scan.id}, **kwargs) if trace: self.trace() def hugeverbose(self, *args, trace=False, **kwargs): + """Logs a whole message in emboldened white text, and optionally the stack trace of the most recent exception. + + Args: + *args: Variable-length argument list to pass to the logger. + trace (bool, optional): Whether to log the stack trace of the most recently caught exception. Defaults to False. + **kwargs: Arbitrary keyword arguments to pass to the logger. + + Examples: + >>> self.hugeverbose("This is a huge verbose message") + >>> self.hugeverbose("This is a huge verbose message with a trace", trace=True) + """ self.log.hugeverbose(*args, extra={"scan_id": self.scan.id}, **kwargs) if trace: self.trace() def info(self, *args, trace=False, **kwargs): + """Logs informational messages and optionally the stack trace of the most recent exception. + + Args: + *args: Variable-length argument list to pass to the logger. + trace (bool, optional): Whether to log the stack trace of the most recently caught exception. Defaults to False. + **kwargs: Arbitrary keyword arguments to pass to the logger. + + Examples: + >>> self.info("This is an informational message") + >>> self.info("This is an informational message with a trace", trace=True) + """ self.log.info(*args, extra={"scan_id": self.scan.id}, **kwargs) if trace: self.trace() def hugeinfo(self, *args, trace=False, **kwargs): + """Logs a whole message in emboldened blue text, and optionally the stack trace of the most recent exception. + + Args: + *args: Variable-length argument list to pass to the logger. + trace (bool, optional): Whether to log the stack trace of the most recently caught exception. Defaults to False. + **kwargs: Arbitrary keyword arguments to pass to the logger. + + Examples: + >>> self.hugeinfo("This is a huge informational message") + >>> self.hugeinfo("This is a huge informational message with a trace", trace=True) + """ self.log.hugeinfo(*args, extra={"scan_id": self.scan.id}, **kwargs) if trace: self.trace() def success(self, *args, trace=False, **kwargs): + """Logs a success message, and optionally the stack trace of the most recent exception. + + Args: + *args: Variable-length argument list to pass to the logger. + trace (bool, optional): Whether to log the stack trace of the most recently caught exception. Defaults to False. + **kwargs: Arbitrary keyword arguments to pass to the logger. + + Examples: + >>> self.success("Operation completed successfully") + >>> self.success("Operation completed with a trace", trace=True) + """ self.log.success(*args, extra={"scan_id": self.scan.id}, **kwargs) if trace: self.trace() def hugesuccess(self, *args, trace=False, **kwargs): + """Logs a whole message in emboldened green text, and optionally the stack trace of the most recent exception. + + Args: + *args: Variable-length argument list to pass to the logger. + trace (bool, optional): Whether to log the stack trace of the most recently caught exception. Defaults to False. + **kwargs: Arbitrary keyword arguments to pass to the logger. + + Examples: + >>> self.hugesuccess("This is a huge success message") + >>> self.hugesuccess("This is a huge success message with a trace", trace=True) + """ self.log.hugesuccess(*args, extra={"scan_id": self.scan.id}, **kwargs) if trace: self.trace() def warning(self, *args, trace=True, **kwargs): + """Logs a warning message, and optionally the stack trace of the most recent exception. + + Args: + *args: Variable-length argument list to pass to the logger. + trace (bool, optional): Whether to log the stack trace of the most recently caught exception. Defaults to True. + **kwargs: Arbitrary keyword arguments to pass to the logger. + + Examples: + >>> self.warning("This is a warning message") + >>> self.warning("This is a warning message with a trace", trace=False) + """ self.log.warning(*args, extra={"scan_id": self.scan.id}, **kwargs) if trace: self.trace() def hugewarning(self, *args, trace=True, **kwargs): + """Logs a whole message in emboldened orange text, and optionally the stack trace of the most recent exception. + + Args: + *args: Variable-length argument list to pass to the logger. + trace (bool, optional): Whether to log the stack trace of the most recently caught exception. Defaults to True. + **kwargs: Arbitrary keyword arguments to pass to the logger. + + Examples: + >>> self.hugewarning("This is a huge warning message") + >>> self.hugewarning("This is a huge warning message with a trace", trace=False) + """ self.log.hugewarning(*args, extra={"scan_id": self.scan.id}, **kwargs) if trace: self.trace() def error(self, *args, trace=True, **kwargs): + """Logs an error message, and optionally the stack trace of the most recent exception. + + Args: + *args: Variable-length argument list to pass to the logger. + trace (bool, optional): Whether to log the stack trace of the most recently caught exception. Defaults to True. + **kwargs: Arbitrary keyword arguments to pass to the logger. + + Examples: + >>> self.error("This is an error message") + >>> self.error("This is an error message with a trace", trace=False) + """ self.log.error(*args, extra={"scan_id": self.scan.id}, **kwargs) if trace: self.trace() def trace(self): + """Logs the stack trace of the most recently caught exception. + + This method captures the type, value, and traceback of the most recent exception and logs it using the trace level. It is typically used for debugging purposes. + + Anything logged using this method will always be written to the scan's `debug.log`, even if debugging is not enabled. + + Examples: + >>> try: + >>> 1 / 0 + >>> except ZeroDivisionError: + >>> self.trace() + """ e_type, e_val, e_traceback = exc_info() if e_type is not None: self.log.trace(traceback.format_exc()) def critical(self, *args, trace=True, **kwargs): + """Logs a whole message in emboldened red text, and optionally the stack trace of the most recent exception. + + Args: + *args: Variable-length argument list to pass to the logger. + trace (bool, optional): Whether to log the stack trace of the most recently caught exception. Defaults to True. + **kwargs: Arbitrary keyword arguments to pass to the logger. + + Examples: + >>> self.critical("This is a critical message") + >>> self.critical("This is a critical message with a trace", trace=False) + """ self.log.critical(*args, extra={"scan_id": self.scan.id}, **kwargs) if trace: self.trace() diff --git a/bbot/modules/gowitness.py b/bbot/modules/gowitness.py index 2ae676134d..f19c5ed49e 100644 --- a/bbot/modules/gowitness.py +++ b/bbot/modules/gowitness.py @@ -148,7 +148,7 @@ async def handle_batch(self, *events): _id = row["url_id"] source_url = self.screenshots_taken[_id] source_event = events[source_url] - if self.is_spider_danger(source_event, url): + if self.helpers.is_spider_danger(source_event, url): tags.append("spider-danger") if url and url.startswith("http"): self.emit_event(url, "URL_UNVERIFIED", source=source_event, tags=tags) diff --git a/bbot/modules/internal/excavate.py b/bbot/modules/internal/excavate.py index 720b9b96b5..51b8a4dc22 100644 --- a/bbot/modules/internal/excavate.py +++ b/bbot/modules/internal/excavate.py @@ -78,7 +78,7 @@ async def search(self, content, event, **kwargs): url_event = self.report(result, name, event, **kwargs) if url_event is not None: url_in_scope = self.excavate.scan.in_scope(url_event) - is_spider_danger = self.excavate.is_spider_danger(event, result) + is_spider_danger = self.excavate.helpers.is_spider_danger(event, result) if ( ( urls_found >= self.web_spider_links_per_page and url_in_scope diff --git a/bbot/modules/robots.py b/bbot/modules/robots.py index 48ce967097..98b114b75a 100644 --- a/bbot/modules/robots.py +++ b/bbot/modules/robots.py @@ -46,6 +46,6 @@ async def handle_event(self, event): continue tags = [] - if self.is_spider_danger(event, unverified_url): + if self.helpers.is_spider_danger(event, unverified_url): tags.append("spider-danger") self.emit_event(unverified_url, "URL_UNVERIFIED", source=event, tags=tags) diff --git a/bbot/scanner/scanner.py b/bbot/scanner/scanner.py index 686794ce69..69815e2e0a 100644 --- a/bbot/scanner/scanner.py +++ b/bbot/scanner/scanner.py @@ -345,7 +345,7 @@ async def async_start(self): break if "python" in self.modules: - events, finish = await self.modules["python"].events_waiting() + events, finish = await self.modules["python"]._events_waiting() for e in events: yield e diff --git a/docs/dev/scanner.md b/docs/dev/scanner.md index f5388688c0..a03de4e4bb 100644 --- a/docs/dev/scanner.md +++ b/docs/dev/scanner.md @@ -1,3 +1 @@ -# `bbot.scanner.Scanner()` - ::: bbot.scanner.Scanner diff --git a/mkdocs.yml b/mkdocs.yml index 3b5118b30e..80dcdef6dc 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -32,6 +32,8 @@ nav: - How to Write a Module: contribution.md - Developer Reference: - Scanner: dev/scanner.md + - Target: dev/target.md + - BaseModule: dev/basemodule.md - Helpers: # dev/helpers/index.md - Miscellaneous: dev/helpers/misc.md @@ -60,6 +62,7 @@ plugins: handlers: python: options: + heading_level: 1 show_signature_annotations: true show_root_toc_entry: false show_root_heading: true