ongoing work on developer documentation

blacklanternsecurity · Sep 16, 2023 · 1308749 · 1308749
1 parent a92ad06
commit 1308749
Show file tree

Hide file tree

Showing 9 changed files with 820 additions and 57 deletions.
diff --git a/bbot/core/helpers/modules.py b/bbot/core/helpers/modules.py
@@ -11,6 +11,14 @@
 
 
 class ModuleLoader:
+    """
+    Main class responsible for loading BBOT modules.
+
+    This class is in charge of preloading modules to determine their dependencies.
+    Once dependencies are identified, they are installed before the actual module is imported.
+    This ensures that all requisite libraries and components are available for the module to function correctly.
+    """
+
     def __init__(self):
         self._preloaded = {}
         self._preloaded_orig = None
@@ -24,8 +32,25 @@ def file_filter(self, file):
         return file.suffix.lower() == ".py" and file.stem not in ["base", "__init__"]
 
     def preload(self, module_dir):
-        """
-        Preload modules from a specified directory
+        """Preloads all modules within a directory.
+
+        This function recursively iterates through each file in the specified directory
+        and preloads the BBOT module to gather its meta-information and dependencies.
+
+        Args:
+            module_dir (str or Path): Directory containing BBOT modules to be preloaded.
+
+        Returns:
+            dict: A dictionary where keys are the names of the preloaded modules and
+            values are their respective preloaded data.
+
+        Examples:
+            >>> preload("/path/to/bbot_modules/")
+            {
+                "module1": {...},
+                "module2": {...},
+                ...
+            }
         """
         module_dir = Path(module_dir)
         for module_file in list_files(module_dir, filter=self.file_filter):
@@ -50,7 +75,7 @@ def preload(self, module_dir):
                 print(f"[CRIT] Error in {module_file.name}")
                 sys.exit(1)
 
-        return self.preloaded
+        return self._preloaded
 
     def preloaded(self, type=None):
         preloaded = {}
@@ -77,6 +102,51 @@ def check_type(self, module, type):
         return self._preloaded[module]["type"] == type
 
     def preload_module(self, module_file):
+        """
+        Preloads a BBOT module to gather its meta-information and dependencies.
+
+        This function reads a BBOT module file, extracts its attributes such as
+        events watched and produced, flags, meta-information, and dependencies.
+
+        Args:
+            module_file (str): Path to the BBOT module file.
+
+        Returns:
+            dict: A dictionary containing meta-information and dependencies for the module.
+
+        Examples:
+            >>> preload_module("bbot/modules/wappalyzer.py")
+            {
+                "watched_events": [
+                    "HTTP_RESPONSE"
+                ],
+                "produced_events": [
+                    "TECHNOLOGY"
+                ],
+                "flags": [
+                    "active",
+                    "safe",
+                    "web-basic",
+                    "web-thorough"
+                ],
+                "meta": {
+                    "description": "Extract technologies from web responses"
+                },
+                "config": {},
+                "options_desc": {},
+                "hash": "d5a88dd3866c876b81939c920bf4959716e2a374",
+                "deps": {
+                    "pip": [
+                        "python-Wappalyzer~=0.3.1"
+                    ],
+                    "pip_constraints": [],
+                    "shell": [],
+                    "apt": [],
+                    "ansible": []
+                },
+                "sudo": false
+            }
+        """
         watched_events = []
         produced_events = []
         flags = []
@@ -185,6 +255,22 @@ def load_modules(self, module_names):
         return modules
 
     def load_module(self, module_name):
+        """Loads a BBOT module by its name.
+
+        Imports the module from its namespace, locates its class, and returns it.
+        Identifies modules based on the presence of `watched_events` and `produced_events` attributes.
+
+        Args:
+            module_name (str): The name of the module to load.
+
+        Returns:
+            object: The loaded module class object.
+
+        Examples:
+            >>> module = load_module("example_module")
+            >>> isinstance(module, object)
+            True
+        """
         namespace = self._preloaded[module_name]["namespace"]
         import_path = f"{namespace}.{module_name}"
         module_variables = importlib.import_module(import_path, "bbot")
@@ -208,6 +294,8 @@ def load_module(self, module_name):
     def recommend_dependencies(self, modules):
         """
         Returns a dictionary containing missing dependencies and their suggested resolutions
+
+        Needs work. For this we should probably be building a dependency graph
         """
         resolve_choices = {}
         # step 1: build a dictionary containing event types and their associated modules
@@ -272,6 +360,27 @@ def add_or_create(d, k, *items):
             d[k] = set(items)
 
     def modules_table(self, modules=None, mod_type=None):
+        """Generates a table of module information.
+
+        Constructs a table to display information such as module name, type, and event details.
+
+        Args:
+            modules (list, optional): List of module names to include in the table.
+            mod_type (str, optional): Type of modules to include ('scan', 'output', 'internal').
+
+        Returns:
+            str: A formatted table string.
+
+        Examples:
+            >>> print(modules_table(["nmap"]))
+            +----------+--------+-----------------+------------------------------+-------------------------------+----------------------+-------------------+
+            | Module   | Type   | Needs API Key   | Description                  | Flags                         | Consumed Events      | Produced Events   |
+            +==========+========+=================+==============================+===============================+======================+===================+
+            | nmap     | scan   | No              | Execute port scans with nmap | active, aggressive, portscan, | DNS_NAME, IP_ADDRESS | OPEN_TCP_PORT     |
+            |          |        |                 |                              | web-thorough                  |                      |                   |
+            +----------+--------+-----------------+------------------------------+-------------------------------+----------------------+-------------------+
+        """
+
         table = []
         header = ["Module", "Type", "Needs API Key", "Description", "Flags", "Consumed Events", "Produced Events"]
         maxcolwidths = [20, 10, 5, 30, 30, 20, 20]

diff --git a/bbot/core/helpers/punycode.py b/bbot/core/helpers/punycode.py
@@ -17,7 +17,17 @@ def split_text(text):
 
 def smart_encode_punycode(text: str) -> str:
     """
-    ドメイン.テスト --> xn--eckwd4c7c.xn--zckzah
+    Encodes a given string using Punycode, while leaving non-alphanumeric segments untouched.
+
+    Args:
+        text (str): The string to be encoded.
+
+    Returns:
+        str: The Punycode encoded string.
+
+    Examples:
+        >>> smart_encode_punycode("ドメイン.テスト")
+        "xn--eckwd4c7c.xn--zckzah"
     """
     segments = split_text(text)
     result_segments = []
@@ -36,7 +46,17 @@ def smart_encode_punycode(text: str) -> str:
 
 def smart_decode_punycode(text: str) -> str:
     """
-    xn--eckwd4c7c.xn--zckzah --> ドメイン.テスト
+    Decodes a given Punycode encoded string, while leaving non-alphanumeric segments untouched.
+
+    Args:
+        text (str): The Punycode encoded string to be decoded.
+
+    Returns:
+        str: The decoded string.
+
+    Examples:
+        >>> smart_decode_punycode("xn--eckwd4c7c.xn--zckzah")
+        "ドメイン.テスト"
     """
     segments = split_text(text)
     result_segments = []

diff --git a/bbot/core/helpers/ratelimiter.py b/bbot/core/helpers/ratelimiter.py
@@ -6,6 +6,20 @@
 
 
 class RateLimiter:
+    """
+    An asynchronous rate limiter class designed to be used as a context manager.
+
+    Args:
+        rate (int): The number of allowed requests per second.
+        name (str): The name of the rate limiter, used for logging.
+
+    Examples:
+        >>> rate_limiter = RateLimiter(100, "web")
+        >>> async def rate_limited_request(url):
+        ...     async with rate_limiter:
+        ...         return await request(url)
+    """
+
     def __init__(self, rate, name):
         self.rate = rate / 10
         self.name = name

diff --git a/bbot/core/helpers/regexes.py b/bbot/core/helpers/regexes.py
@@ -19,14 +19,20 @@
 word_regex = re.compile(r"[^\d\W_]+")
 word_num_regex = re.compile(r"[^\W_]+")
 num_regex = re.compile(r"\d+")
+
 _ipv6_regex = r"[A-F0-9:]*:[A-F0-9:]*:[A-F0-9:]*"
 ipv6_regex = re.compile(_ipv6_regex, re.I)
+
 # dns names with periods
 _dns_name_regex = r"(?:\w(?:[\w-]{0,100}\w)?\.)+[^\W_]{1,63}\.?"
+dns_name_regex = re.compile(_dns_name_regex, re.I)
 # dns names without periods
 _hostname_regex = r"(?!\w*\.\w+)\w(?:[\w-]{0,100}\w)?"
+hostname_regex = re.compile(r"^" + _hostname_regex + r"$", re.I)
+
 _email_regex = r"(?:[^\W_][\w\-\.\+]{,100})@" + _dns_name_regex
 email_regex = re.compile(_email_regex, re.I)
+
 _ptr_regex = r"(?:[0-9]{1,3}[-_\.]){3}[0-9]{1,3}"
 ptr_regex = re.compile(_ptr_regex)
 # uuid regex
@@ -50,6 +56,7 @@
 _double_slash_regex = r"/{2,}"
 double_slash_regex = re.compile(_double_slash_regex)
 
+# event type regexes, used throughout BBOT for autodetection of event types, validation, and excavation.
 event_type_regexes = OrderedDict(
     (
         (k, tuple(re.compile(r, re.I) for r in regexes))
@@ -78,9 +85,8 @@
 )
 
 event_id_regex = re.compile(r"[0-9a-f]{40}:[A-Z0-9_]+")
-dns_name_regex = re.compile(_dns_name_regex, re.I)
 scan_name_regex = re.compile(r"[a-z]{3,20}_[a-z]{3,20}")
-hostname_regex = re.compile(r"^" + _hostname_regex + r"$", re.I)
+
 
 # For use with extract_params_html helper
 input_tag_regex = re.compile(r"<input[^>]+?name=[\"\'](\w+)[\"\']")