blacklanternsecurity · TheTechromancer · Jan 12, 2024 · Jan 7, 2024 · Jan 7, 2024 · Jan 7, 2024
diff --git a/README.md b/README.md
diff --git a/bbot/core/helpers/diff.py b/bbot/core/helpers/diff.py
@@ -182,7 +182,7 @@ async def compare(
 
         different_headers = self.compare_headers(self.baseline.headers, subject_response.headers)
         if different_headers:
-            log.debug(f"headers were different, no match [{different_headers}]")
+            log.debug(f"headers were different, no match")
             diff_reasons.append("header")
 
         if self.compare_body(self.baseline_json, subject_json) == False:

diff --git a/bbot/modules/base.py b/bbot/modules/base.py
@@ -855,7 +855,7 @@ def set_error_state(self, message=None, clear_outgoing_queue=False):
             - If the module was already in an errored state, the function will not reset the error state or the queue.
         """
         if not self.errored:
-            log_msg = f"Setting error state for module {self.name}"
+            log_msg = "Setting error state"
             if message is not None:
                 log_msg += f": {message}"
             self.warning(log_msg)

diff --git a/bbot/scanner/manager.py b/bbot/scanner/manager.py
@@ -13,7 +13,7 @@ class ScanManager:
     """
     Manages the modules, event queues, and overall event flow during a scan.
 
-    Simultaneously serves as a shepherd, policeman, judge, jury, and executioner for events.
+    Simultaneously serves as a policeman, judge, jury, and executioner for events.
     It is responsible for managing the incoming event queue and distributing events to modules.
 
     Attributes:

diff --git a/bbot/scripts/docs.py b/bbot/scripts/docs.py
@@ -56,6 +56,16 @@ def update_md_files(keyword, s):
         for file in md_files:
             find_replace_file(file, keyword, s)
 
+    def update_individual_module_options():
+        regex = re.compile("BBOT MODULE OPTIONS ([A-Z_]+)")
+        for file in md_files:
+            with open(file) as f:
+                content = f.read()
+            for match in regex.finditer(content):
+                module_name = match.groups()[0].lower()
+                bbot_module_options_table = module_loader.modules_options_table(modules=[module_name])
+                find_replace_file(file, f"BBOT MODULE OPTIONS {module_name.upper()}", bbot_module_options_table)
+
     # Example commands
     bbot_example_commands = []
     for title, description, command in scan_examples:
@@ -88,6 +98,7 @@ def update_md_files(keyword, s):
     bbot_module_options_table = module_loader.modules_options_table()
     assert len(bbot_module_options_table.splitlines()) > 100
     update_md_files("BBOT MODULE OPTIONS", bbot_module_options_table)
+    update_individual_module_options()
 
     # BBOT module flags
     bbot_module_flags_table = module_loader.flags_table()
@@ -103,26 +114,39 @@ def update_md_files(keyword, s):
     update_md_files("BBOT DEFAULT CONFIG", default_config_yml)
 
     # Table of Contents
+    base_url = "https://www.blacklanternsecurity.com/bbot"
+
+    def format_section(section_title, section_path):
+        path = section_path.split("index.md")[0]
+        path = path.split(".md")[0]
+        return f"- [{section_title}]({base_url}/{path})\n"
+
+    bbot_docs_toc = ""
+
+    def update_toc(section, level=0):
+        nonlocal bbot_docs_toc
+        indent = " " * 4 * level
+        if isinstance(section, dict):
+            for section_title, subsections in section.items():
+                if isinstance(subsections, str):
+                    bbot_docs_toc += f"{indent}{format_section(section_title, subsections)}"
+                else:
+                    bbot_docs_toc += f"{indent}- **{section_title}**\n"
+                    for subsection in subsections:
+                        update_toc(subsection, level=level + 1)
+
     mkdocs_yml_file = bbot_code_dir / "mkdocs.yml"
     yaml.SafeLoader.add_constructor(
         "tag:yaml.org,2002:python/name:pymdownx.superfences.fence_code_format", lambda x, y: {}
     )
-    bbot_docs_toc = ""
-    base_url = "https://www.blacklanternsecurity.com/bbot"
+
     with open(mkdocs_yml_file, "r") as f:
         mkdocs_yaml = yaml.safe_load(f)
         nav = mkdocs_yaml["nav"]
         for section in nav:
-            for section_title, subsections in section.items():
-                bbot_docs_toc += f"- **{section_title}**\n"
-                for subsection in subsections:
-                    for subsection_title, subsection_path in subsection.items():
-                        if isinstance(subsection_path, str):
-                            path = subsection_path.split("index.md")[0]
-                            path = path.split(".md")[0]
-                            bbot_docs_toc += f"    - [{subsection_title}]({base_url}/{path})\n"
+            update_toc(section)
     bbot_docs_toc = bbot_docs_toc.strip()
-    assert len(bbot_docs_toc.splitlines()) > 5
+    # assert len(bbot_docs_toc.splitlines()) == 2
     update_md_files("BBOT DOCS TOC", bbot_docs_toc)
 
 

diff --git a/docs/comparison.md b/docs/comparison.md
@@ -13,3 +13,9 @@ Thanks to BBOT's recursive nature (and its `massdns` module with its NLP-powered
 ![runtimes](https://github.com/blacklanternsecurity/bbot/assets/20261699/66cafb5f-045b-4d88-9ffa-7542b3dada4f)
 
 For a detailed analysis of this data, please see [Subdomain Enumeration Tool Face-Off](https://blog.blacklanternsecurity.com/p/subdomain-enumeration-tool-face-off-4e5)
+
+### Ebay.com (larger domain)
+
+![subdomain-stats-ebay](https://github.com/blacklanternsecurity/bbot/assets/20261699/53e07e9f-50b6-4b70-9e83-297dbfbcb436)
+
+_Note that in this benchmark, Spiderfoot crashed after ~20 minutes due to excessive memory usage. Amass never finished and had to be cancelled after 24h. All other tools finished successfully._
diff --git a/docs/how_it_works.md b/docs/how_it_works.md
@@ -1,22 +1,24 @@
-# What is it?
+# What is BBOT?
 
-BBOT is a system of modules that interchange data **recursively**. Okay, but like, **_what is it?_**
+BBOT is a system of individual modules that interchange data **recursively**. Every module (e.g. `nmap`) _consumes_ a type of data (e.g. a `DNS_NAME`) and _emits_ another kind, (an `OPEN_TCP_PORT`). These bits of data, called [events](scanning/events.md), become the output of the tool, but are also redistributed to all the other modules, prompting them to dig deeper, and feeding the recursive cycle of discovery.
+
+![recursion](https://github.com/blacklanternsecurity/bbot/assets/20261699/7b2edfca-2692-463b-939b-ab9d52d2fe00)
 
 ## What It **_Isn't_**
 
-BBOT's discovery process does not have "phases", or "stages"; i.e. it does not work like this:
+It's important to understand that BBOT has a fundamentally different philosophy from most tools. Its discovery process does not have "phases", or "stages"; i.e. it does not work like this:
 
 ![how_it_doesnt_work](https://github.com/blacklanternsecurity/bbot/assets/20261699/67c4e332-f181-47e7-b884-2112bda347a4)
 
 This is a traditional OSINT process, where you start with a target and you work in stages. Each stage gets you a little more data and requires more cleaning/deduplication, until finally you reach the end. The problem with this approach is that it **misses things**. 
 
-Imagine if on the last step of this process, you discovered a new subdomain. Awesome! But wait, shouldn't you go back and check that one the same way you did the others? Shouldn't you port-scan it and SSL-mine it and so on? Maybe you're a thorough, hard-working human, and you take the time to do that. Maybe by doing that, you find another subdomain! _Sigh._ What about this time? Should you start over again for that one? You see the dilemma.
+Imagine if on the last step of this process, you discovered a new subdomain. Awesome! But shouldn't you go back and check that one the same way you did the others? Shouldn't you port-scan it and SSL-mine it, extract its web contents, and so on? Let's assume you do that, and maybe during that process you even discover another subdomain! What about this time? Should you start over again for that one? You see the dilemma.
 
 ![traditional-workflow](https://github.com/blacklanternsecurity/bbot/assets/20261699/aa7cb6ac-6f88-464a-8069-0d534cecfd2b)
 
-## What It **_Is_**
+## Recursion
 
-Instead, BBOT works recursively, treating each new individual piece of data as an opportunity to find even more. When it finds something, it feeds it back into the machine and uses it to fuel the discovery process. It continues to churn like this until there is no new data to discover.
+Recursion is at the heart of BBOT's design. Each newly-discovered piece of data is fed it back into the machine, fueling the discovery process. This continues until there is no new data to discover.
 
 ![bbot-workflow](https://github.com/blacklanternsecurity/bbot/assets/20261699/1b56c472-c2c4-41b5-b711-4b7296ec7b20)
 

diff --git a/docs/modules/nuclei.md b/docs/modules/nuclei.md
@@ -2,7 +2,7 @@
 
 ## Overview
 
-BBOT's interface with the open-source vulnerability scanner [Nuclei](https://github.com/projectdiscovery/nuclei) by Project Discovery. This is one of the ways BBOT makes it possible to go from a domain name or IP all the way to confirmed vulnerabilities, in one scan. 
+BBOT integrates with [Nuclei](https://github.com/projectdiscovery/nuclei), an open-source web vulnerability scanner by Project Discovery. This is one of the ways BBOT makes it possible to go from a single target domain/IP all the way to confirmed vulnerabilities, in one scan. 
 
 ![Nuclei Killchain](https://github.com/blacklanternsecurity/bbot/assets/24899338/7174c4ba-4a6e-4596-bb89-5a0c5f5abe74)
 
@@ -13,33 +13,35 @@ BBOT's interface with the open-source vulnerability scanner [Nuclei](https://git
 
 ## Default Behavior
 
-* By default, it will scan *only directory URLs*, but it will scan with ALL templates (**BE CAREFUL!**)
-* Because it's so aggressive, its considered a **deadly** module. This means you need to use the flag **--allow-deadly** to turn it on.
+* By default, only "directory URLs" (URLs ending in a slash) will be scanned, but ALL templates will be used (**BE CAREFUL!**)
+* Because it's so aggressive, Nuclei is considered a **deadly** module. This means you need to use the flag **--allow-deadly** to turn it on.
 
 ## Configuration and Options
 
 The Nuclei module has many configuration options:
 
-| Option         | Description                                                              | Default |
-|----------------|--------------------------------------------------------------------------|---------|
-| version        | What version of Nuclei to use                                            | 2.9.9   |
-| tags           | Limit Nuclei to templates w/these tags                                   | <blank> |
-| templates      | Path to template file, or template directory                             | <blank> |
-| severity       | Filter based on severity field available in the template                 | <blank> |
-| ratelimit      | maximum number of requests to send per second                            | 150     |
-| concurrency    | maximum number of templates to be executed in parallel                   | 25      |
-| mode           | technology \| severe \| manual \| budget                                 | manual  |
-| etags          | Tags to exclude from the scan                                            | <blank> |
-| directory_only | When on, limits scan to only "directory" URLs (omit endpoints)           | True    |
-| budget         | Used in budget mode to set the number of requests which will be allotted | 1       |
-| retries        | Number of times to retry a failed request                                | 0       |
-| batch_size     | The number of targets BBOT will pass to Nuclei at a time                 | 200     |
-
-Most of these you probably will **NOT** want to change. In particular, we strongly advise against changing the version of Nuclei, as it's very likely the latest version won't work right with BBOT.
-
-We also do not recommend changing **directory_only** mode. Because BBOT is recursive, feeding Nuclei every URL can get very out-of-hand very quickly, depending on what other modules are in use.
-
-### Mode ###
+<!-- BBOT MODULE OPTIONS NUCLEI -->
+| Config Option                 | Type   | Description                                                                                                                                                                                                                                                                                                     | Default   |
+|-------------------------------|--------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------|
+| modules.nuclei.batch_size     | int    | Number of targets to send to Nuclei per batch (default 200)                                                                                                                                                                                                                                                     | 200       |
+| modules.nuclei.budget         | int    | Used in budget mode to set the number of requests which will be allotted to the nuclei scan                                                                                                                                                                                                                     | 1         |
+| modules.nuclei.concurrency    | int    | maximum number of templates to be executed in parallel (default 25)                                                                                                                                                                                                                                             | 25        |
+| modules.nuclei.directory_only | bool   | Filter out 'file' URL event (default True)                                                                                                                                                                                                                                                                      | True      |
+| modules.nuclei.etags          | str    | tags to exclude from the scan                                                                                                                                                                                                                                                                                   |           |
+| modules.nuclei.mode           | str    | manual | technology | severe | budget. Technology: Only activate based on technology events that match nuclei tags (nuclei -as mode). Manual (DEFAULT): Fully manual settings. Severe: Only critical and high severity templates without intrusive. Budget: Limit Nuclei to a specified number of HTTP requests | manual    |
+| modules.nuclei.ratelimit      | int    | maximum number of requests to send per second (default 150)                                                                                                                                                                                                                                                     | 150       |
+| modules.nuclei.retries        | int    | number of times to retry a failed request (default 0)                                                                                                                                                                                                                                                           | 0         |
+| modules.nuclei.severity       | str    | Filter based on severity field available in the template.                                                                                                                                                                                                                                                       |           |
+| modules.nuclei.tags           | str    | execute a subset of templates that contain the provided tags                                                                                                                                                                                                                                                    |           |
+| modules.nuclei.templates      | str    | template or template directory paths to include in the scan                                                                                                                                                                                                                                                     |           |
+| modules.nuclei.version        | str    | nuclei version                                                                                                                                                                                                                                                                                                  | 3.0.4     |
+<!-- END BBOT MODULE OPTIONS NUCLEI -->
+
+Most of these you probably will **NOT** want to change. In particular, we advise against changing the version of Nuclei, as it's possible the latest version won't work right with BBOT.
+
+We also do not recommend changing **directory_only** mode. This will cause Nuclei to process every URL. Because BBOT is recursive, this can get very out-of-hand very quickly, depending on which other modules are in use.
+
+### Modes ###
 
 The modes with the Nuclei module are generally in place to help you limit the number of templates you are scanning with, to make your scans quicker. 
 
@@ -82,18 +84,22 @@ The **ratelimit** and **concurrency** settings default to the same defaults that
 
 ### Example Commands
 
-* Scan a SINGLE target with a basic port scan and web modules
-
-`COMMAND: bbot -f web-basic -m nmap nuclei --allow-deadly -t app.evilcorp.com`
-
-* Scanning MULTIPLE targets
-
-`bbot -f web-basic -m nmap nuclei --allow-deadly -t app1.evilcorp.com app2.evilcorp.com app3.evilcorp.com`
-
-* Scanning MULTIPLE targets while performing subdomain enumeration
-
-`bbot -f subdomain-enum web-basic -m nmap nuclei –allow-deadly -t app1.evilcorp.com app2.evilcorp.com app3.evilcorp.com`
-
-* Scanning MULTIPLE targets on a BUDGET
-
-`bbot -f subdomain-enum web-basic -m nmap nuclei –allow-deadly –c modules.nuclei.mode=Budget -t app1.evilcorp.com app2.evilcorp.com app3.evilcorp.com`
+```bash
+# Scan a SINGLE target with a basic port scan and web modules
+bbot -f web-basic -m nmap nuclei --allow-deadly -t app.evilcorp.com
+```
+
+```bash
+# Scanning MULTIPLE targets
+bbot -f web-basic -m nmap nuclei --allow-deadly -t app1.evilcorp.com app2.evilcorp.com app3.evilcorp.com
+```
+
+```bash
+# Scanning MULTIPLE targets while performing subdomain enumeration
+bbot -f subdomain-enum web-basic -m nmap nuclei –allow-deadly -t app1.evilcorp.com app2.evilcorp.com app3.evilcorp.com
+```
+
+```bash
+# Scanning MULTIPLE targets on a BUDGET
+bbot -f subdomain-enum web-basic -m nmap nuclei –allow-deadly –c modules.nuclei.mode=Budget -t app1.evilcorp.com app2.evilcorp.com app3.evilcorp.com
+```