diff --git a/bbot/modules/internal/speculate.py b/bbot/modules/internal/speculate.py index afb89b1492..0664f954a8 100644 --- a/bbot/modules/internal/speculate.py +++ b/bbot/modules/internal/speculate.py @@ -1,6 +1,5 @@ import random import ipaddress -from pathlib import Path from bbot.core.helpers import validators from bbot.modules.internal.base import BaseInternalModule @@ -24,9 +23,8 @@ class speculate(BaseInternalModule): "SOCIAL", "AZURE_TENANT", "USERNAME", - "FILESYSTEM", ] - produced_events = ["DNS_NAME", "OPEN_TCP_PORT", "IP_ADDRESS", "FINDING", "ORG_STUB", "FILESYSTEM"] + produced_events = ["DNS_NAME", "OPEN_TCP_PORT", "IP_ADDRESS", "FINDING", "ORG_STUB"] flags = ["passive"] meta = { "description": "Derive certain event types from others by common sense", @@ -34,11 +32,10 @@ class speculate(BaseInternalModule): "author": "@liquidsec", } - options = {"max_hosts": 65536, "ports": "80,443", "ignore_folders": [".git"]} + options = {"max_hosts": 65536, "ports": "80,443"} options_desc = { "max_hosts": "Max number of IP_RANGE hosts to convert into IP_ADDRESS events", "ports": "The set of ports to speculate on", - "ignore_folders": "Subfolders to ignore when crawling downloaded folders", } scope_distance_modifier = 1 _priority = 4 @@ -75,13 +72,6 @@ async def setup(self): self.hugewarning(f'Enabling the "portscan" module is highly recommended') self.range_to_ip = False - self.ignored_folders = self.config.get("ignore_folders", []) - - return True - - async def filter_event(self, event): - if event.type == "FILESYSTEM" and "folder" not in event.tags: - return False, "Event is not a folder" return True async def handle_event(self, event): @@ -205,14 +195,3 @@ async def handle_event(self, event): email_event = self.make_event(email, "EMAIL_ADDRESS", parent=event, tags=["affiliate"]) if email_event: await self.emit_event(email_event, context="detected {event.type}: {event.data}") - - # FILESYSTEM (folder) --> FILESYSTEM (files) - if event.type == "FILESYSTEM": - folder_path = Path(event.data["path"]) - for file_path in folder_path.rglob("*"): - # If the file is not in an ignored folder and if it has an allowed extension raise it as a FILESYSTEM event - if not any(ignored_folder in str(file_path) for ignored_folder in self.ignored_folders): - file_event = self.make_event( - {"path": str(file_path)}, "FILESYSTEM", tags=["parsed_folder", "file"], parent=event - ) - await self.emit_event(file_event) diff --git a/bbot/test/test_step_2/module_tests/test_module_git_clone.py b/bbot/test/test_step_2/module_tests/test_module_git_clone.py index f0b91a2a15..15bc54fb37 100644 --- a/bbot/test/test_step_2/module_tests/test_module_git_clone.py +++ b/bbot/test/test_step_2/module_tests/test_module_git_clone.py @@ -215,7 +215,7 @@ class TestGit_CloneWithBlob(TestGit_Clone): config_overrides = {"folder_blobs": True} def check(self, module_test, events): - filesystem_events = [e for e in events if e.type == "FILESYSTEM" and "folder" in e.tags] + filesystem_events = [e for e in events if e.type == "FILESYSTEM"] assert len(filesystem_events) == 1 assert all(["blob" in e.data for e in filesystem_events]) filesystem_event = filesystem_events[0] diff --git a/bbot/test/test_step_2/module_tests/test_module_trufflehog.py b/bbot/test/test_step_2/module_tests/test_module_trufflehog.py index 46798dd94e..a838ad6ab8 100644 --- a/bbot/test/test_step_2/module_tests/test_module_trufflehog.py +++ b/bbot/test/test_step_2/module_tests/test_module_trufflehog.py @@ -14,6 +14,7 @@ class TestTrufflehog(ModuleTestBase): "github_org", "speculate", "git_clone", + "unstructured", "github_workflows", "dockerhub", "docker_pull", @@ -1134,7 +1135,7 @@ def check(self, module_test, events): and "Raw result: [https://admin:admin@the-internet.herokuapp.com]" in e.data["description"] and "RawV2 result: [https://admin:admin@the-internet.herokuapp.com/basic_auth]" in e.data["description"] ] - # Trufflehog should find 4 verifiable secrets, 1 from the github, 1 from the workflow log, 1 from the docker image and 1 from the postman. + # Trufflehog should find 4 verifiable secrets, 1 from the github, 1 from the workflow log, 1 from the docker image and 1 from the postman. Unstructured will extract the text file but trufflehog should reject it as its already scanned the containing folder assert 4 == len(vuln_events), "Failed to find secret in events" github_repo_event = [e for e in vuln_events if "test_keys" in e.data["description"]][0].parent folder = Path(github_repo_event.data["path"]) @@ -1196,7 +1197,7 @@ def check(self, module_test, events): and "Potential Secret Found." in e.data["description"] and "Raw result: [https://admin:admin@internal.host.com]" in e.data["description"] ] - # Trufflehog should find 4 unverifiable secrets, 1 from the github, 1 from the workflow log, 1 from the docker image and 1 from the postman. + # Trufflehog should find 4 unverifiable secrets, 1 from the github, 1 from the workflow log, 1 from the docker image and 1 from the postman. Unstructured will extract the text file but trufflehog should reject it as its already scanned the containing folder assert 4 == len(finding_events), "Failed to find secret in events" github_repo_event = [e for e in finding_events if "test_keys" in e.data["description"]][0].parent folder = Path(github_repo_event.data["path"])