diff --git a/bbot/core/event/base.py b/bbot/core/event/base.py index 32d6f7a3a1..1ada2b3b07 100644 --- a/bbot/core/event/base.py +++ b/bbot/core/event/base.py @@ -1352,18 +1352,22 @@ def sanitize_data(self, data): self.parsed_url = self.validators.validate_url_parsed(url) data["url"] = self.parsed_url.geturl() - header_dict = {} - for i in data.get("raw_header", "").splitlines(): - if len(i) > 0 and ":" in i: - k, v = i.split(":", 1) - k = k.strip().lower() - v = v.lstrip() - if k in header_dict: - header_dict[k].append(v) - else: - header_dict[k] = [v] + if not "raw_header" in data: + raise ValueError("raw_header is required for HTTP_RESPONSE events") + + if "header-dict" not in data: + header_dict = {} + for i in data.get("raw_header", "").splitlines(): + if len(i) > 0 and ":" in i: + k, v = i.split(":", 1) + k = k.strip().lower() + v = v.lstrip() + if k in header_dict: + header_dict[k].append(v) + else: + header_dict[k] = [v] + data["header-dict"] = header_dict - data["header-dict"] = header_dict # move URL to the front of the dictionary for visibility data = dict(data) new_data = {"url": data.pop("url")} @@ -1377,6 +1381,13 @@ def _words(self): def _pretty_string(self): return f'{self.data["hash"]["header_mmh3"]}:{self.data["hash"]["body_mmh3"]}' + @property + def raw_response(self): + """ + Formats the status code, headers, and body into a single string formatted as an HTTP/1.1 response. + """ + return f'{self.data["raw_header"]}{self.data["body"]}' + @property def http_status(self): try: diff --git a/bbot/modules/dnsbrute_mutations.py b/bbot/modules/dnsbrute_mutations.py index ef0b7a0337..1d063d1e73 100644 --- a/bbot/modules/dnsbrute_mutations.py +++ b/bbot/modules/dnsbrute_mutations.py @@ -1,3 +1,5 @@ +import time + from bbot.modules.base import BaseModule @@ -40,8 +42,11 @@ async def handle_event(self, event): except KeyError: self.found[domain] = {subdomain} - def get_parent_event(self, subdomain): - parent_host = self.helpers.closest_match(subdomain, self.parent_events) + async def get_parent_event(self, subdomain): + start = time.time() + parent_host = await self.helpers.run_in_executor(self.helpers.closest_match, subdomain, self.parent_events) + elapsed = time.time() - start + self.trace(f"{subdomain}: got closest match among {len(self.parent_events):,} parent events in {elapsed:.2f}s") return self.parent_events[parent_host] async def finish(self): diff --git a/bbot/modules/secretsdb.py b/bbot/modules/secretsdb.py deleted file mode 100644 index 2d70e538d2..0000000000 --- a/bbot/modules/secretsdb.py +++ /dev/null @@ -1,78 +0,0 @@ -import re -import yaml - -from .base import BaseModule - - -class secretsdb(BaseModule): - watched_events = ["HTTP_RESPONSE"] - produced_events = ["FINDING"] - flags = ["active", "safe", "web-basic"] - meta = { - "description": "Detect common secrets with secrets-patterns-db", - "created_date": "2023-03-17", - "author": "@TheTechromancer", - } - options = { - "min_confidence": 99, - "signatures": "https://raw.githubusercontent.com/blacklanternsecurity/secrets-patterns-db/master/db/rules-stable.yml", - } - options_desc = { - "min_confidence": "Only use signatures with this confidence score or higher", - "signatures": "File path or URL to YAML signatures", - } - deps_pip = ["pyyaml~=6.0"] - # accept any HTTP_RESPONSE including out-of-scope ones (such as from github_codesearch) - scope_distance_modifier = 3 - - async def setup(self): - self.rules = [] - self.min_confidence = self.config.get("min_confidence", 99) - self.sig_file = await self.helpers.wordlist(self.config.get("signatures", "")) - with open(self.sig_file) as f: - rules_yaml = yaml.safe_load(f).get("patterns", []) - for r in rules_yaml: - r = r.get("pattern", {}) - if not r: - continue - name = r.get("name", "").lower() - confidence = r.get("confidence", "") - if name and confidence >= self.min_confidence: - regex = r.get("regex", "") - try: - compiled_regex = re.compile(regex) - r["regex"] = compiled_regex - self.rules.append(r) - except Exception: - self.debug(f"Error compiling regex: r'{regex}'") - return True - - async def handle_event(self, event): - resp_body = event.data.get("body", "") - resp_headers = event.data.get("raw_header", "") - all_matches = await self.helpers.run_in_executor(self.search_data, resp_body, resp_headers) - for matches, name in all_matches: - matches = [m.string[m.start() : m.end()] for m in matches] - description = f"Possible secret ({name}): {matches}" - event_data = {"host": str(event.host), "description": description} - parsed_url = getattr(event, "parsed_url", None) - if parsed_url: - event_data["url"] = parsed_url.geturl() - await self.emit_event( - event_data, - "FINDING", - parent=event, - context=f"{{module}} searched HTTP response and found {{event.type}}: {description}", - ) - - def search_data(self, resp_body, resp_headers): - all_matches = [] - for r in self.rules: - regex = r["regex"] - name = r["name"] - for text in (resp_body, resp_headers): - if text: - matches = list(regex.finditer(text)) - if matches: - all_matches.append((matches, name)) - return all_matches diff --git a/bbot/modules/trufflehog.py b/bbot/modules/trufflehog.py index 8441c73648..f3508f4578 100644 --- a/bbot/modules/trufflehog.py +++ b/bbot/modules/trufflehog.py @@ -3,7 +3,7 @@ class trufflehog(BaseModule): - watched_events = ["CODE_REPOSITORY", "FILESYSTEM"] + watched_events = ["CODE_REPOSITORY", "FILESYSTEM", "HTTP_RESPONSE"] produced_events = ["FINDING", "VULNERABILITY"] flags = ["passive", "safe", "code-enum"] meta = { @@ -86,7 +86,7 @@ async def handle_event(self, event): path = event.data["url"] if "git" in event.tags: module = "github-experimental" - else: + elif event.type == "FILESYSTEM": path = event.data["path"] if "git" in event.tags: module = "git" @@ -96,6 +96,10 @@ async def handle_event(self, event): module = "postman" else: module = "filesystem" + elif event.type == "HTTP_RESPONSE": + module = "filesystem" + path = self.helpers.tempfile(event.raw_response, pipe=False) + if event.type == "CODE_REPOSITORY": host = event.host else: @@ -108,41 +112,31 @@ async def handle_event(self, event): verified, source_metadata, ) in self.execute_trufflehog(module, path): - if verified: - data = { - "severity": "High", - "description": f"Verified Secret Found. Detector Type: [{detector_name}] Decoder Type: [{decoder_name}] Details: [{source_metadata}]", - "host": host, - } - if description: - data["description"] += f" Description: [{description}]" - data["description"] += f" Raw result: [{raw_result}]" - if rawv2_result: - data["description"] += f" RawV2 result: [{rawv2_result}]" - await self.emit_event( - data, - "VULNERABILITY", - event, - context=f'{{module}} searched {event.type} using "{module}" method and found verified secret ({{event.type}}): {raw_result}', - ) - else: - data = { - "description": f"Potential Secret Found. Detector Type: [{detector_name}] Decoder Type: [{decoder_name}] Details: [{source_metadata}]", - "host": host, - } - if description: - data["description"] += f" Description: [{description}]" - data["description"] += f" Raw result: [{raw_result}]" - if rawv2_result: - data["description"] += f" RawV2 result: [{rawv2_result}]" - await self.emit_event( - data, - "FINDING", - event, - context=f'{{module}} searched {event.type} using "{module}" method and found possible secret ({{event.type}}): {raw_result}', - ) - - async def execute_trufflehog(self, module, path): + verified_str = "Verified" if verified else "Possible" + finding_type = "VULNERABILITY" if verified else "FINDING" + data = { + "description": f"{verified_str} Secret Found. Detector Type: [{detector_name}] Decoder Type: [{decoder_name}] Details: [{source_metadata}]", + "host": host, + } + if finding_type == "VULNERABILITY": + data["severity"] = "High" + if description: + data["description"] += f" Description: [{description}]" + data["description"] += f" Raw result: [{raw_result}]" + if rawv2_result: + data["description"] += f" RawV2 result: [{rawv2_result}]" + await self.emit_event( + data, + finding_type, + event, + context=f'{{module}} searched {event.type} using "{module}" method and found {verified_str.lower()} secret ({{event.type}}): {raw_result}', + ) + + # clean up the tempfile when we're done with it + if event.type == "HTTP_RESPONSE": + path.unlink(missing_ok=True) + + async def execute_trufflehog(self, module, path=None, string=None): command = [ "trufflehog", "--json", diff --git a/bbot/test/test_step_1/test_events.py b/bbot/test/test_step_1/test_events.py index 195f08ea89..78a01d7923 100644 --- a/bbot/test/test_step_1/test_events.py +++ b/bbot/test/test_step_1/test_events.py @@ -149,6 +149,7 @@ async def test_events(events, helpers): "title": "HTTP%20RESPONSE", "url": "http://www.evilcorp.com:80", "input": "http://www.evilcorp.com:80", + "raw_header": "HTTP/1.1 301 Moved Permanently\r\nLocation: http://www.evilcorp.com/asdf\r\n\r\n", "location": "/asdf", "status_code": 301, }, @@ -161,7 +162,13 @@ async def test_events(events, helpers): # http response url validation http_response_2 = scan.make_event( - {"port": "80", "url": "http://evilcorp.com:80/asdf"}, "HTTP_RESPONSE", dummy=True + { + "port": "80", + "url": "http://evilcorp.com:80/asdf", + "raw_header": "HTTP/1.1 301 Moved Permanently\r\nLocation: http://www.evilcorp.com/asdf\r\n\r\n", + }, + "HTTP_RESPONSE", + dummy=True, ) assert http_response_2.data["url"] == "http://evilcorp.com/asdf" @@ -546,6 +553,10 @@ async def test_events(events, helpers): http_response = scan.make_event(httpx_response, "HTTP_RESPONSE", parent=scan.root_event) assert http_response.parent_id == scan.root_event.id assert http_response.data["input"] == "http://example.com:80" + assert ( + http_response.raw_response + == 'HTTP/1.1 200 OK\r\nConnection: close\r\nAge: 526111\r\nCache-Control: max-age=604800\r\nContent-Type: text/html; charset=UTF-8\r\nDate: Mon, 14 Nov 2022 17:14:27 GMT\r\nEtag: "3147526947+ident+gzip"\r\nExpires: Mon, 21 Nov 2022 17:14:27 GMT\r\nLast-Modified: Thu, 17 Oct 2019 07:18:26 GMT\r\nServer: ECS (agb/A445)\r\nVary: Accept-Encoding\r\nX-Cache: HIT\r\n\r\n\n\n\n Example Domain\n\n \n \n \n \n\n\n\n
\n

Example Domain

\n

This domain is for use in illustrative examples in documents. You may use this\n domain in literature without prior coordination or asking for permission.

\n

More information...

\n
\n\n\n' + ) json_event = http_response.json(mode="graph") assert isinstance(json_event["data"], str) json_event = http_response.json() @@ -906,7 +917,12 @@ def test_event_closest_host(): assert event1.host == "evilcorp.com" # second event has a host + url event2 = scan.make_event( - {"method": "GET", "url": "http://www.evilcorp.com/asdf", "hash": {"header_mmh3": "1", "body_mmh3": "2"}}, + { + "method": "GET", + "url": "http://www.evilcorp.com/asdf", + "hash": {"header_mmh3": "1", "body_mmh3": "2"}, + "raw_header": "HTTP/1.1 301 Moved Permanently\r\nLocation: http://www.evilcorp.com/asdf\r\n\r\n", + }, "HTTP_RESPONSE", parent=event1, ) diff --git a/bbot/test/test_step_2/module_tests/test_module_github_codesearch.py b/bbot/test/test_step_2/module_tests/test_module_github_codesearch.py index 03c519a8cf..7ede43b7e9 100644 --- a/bbot/test/test_step_2/module_tests/test_module_github_codesearch.py +++ b/bbot/test/test_step_2/module_tests/test_module_github_codesearch.py @@ -3,17 +3,35 @@ class TestGithub_Codesearch(ModuleTestBase): config_overrides = { - "modules": {"github_codesearch": {"api_key": "asdf", "limit": 1}}, + "modules": { + "github_codesearch": {"api_key": "asdf", "limit": 1}, + "trufflehog": {"only_verified": False}, + }, "omit_event_types": [], "scope": {"report_distance": 2}, } - modules_overrides = ["github_codesearch", "httpx", "secretsdb"] + modules_overrides = ["github_codesearch", "httpx", "trufflehog"] github_file_endpoint = ( "/projectdiscovery/nuclei/06f242e5fce3439b7418877676810cbf57934875/v2/cmd/cve-annotate/main.go" ) github_file_url = f"http://127.0.0.1:8888{github_file_endpoint}" - github_file_content = "-----BEGIN PGP PRIVATE KEY BLOCK-----" + github_file_content = """-----BEGIN PRIVATE KEY----- +MIICdwIBADANBgkqhkiG9w0BAQEFAASCAmEwggJdAgEAAoGBAOBY2pd9PSQvuxqu +WXFNVgILTWuUc721Wc2sFNvp4beowhUe1lfxaq5ZfCJcz7z4QsqFhOeks69O9UIb +oiOTDocPDog9PHO8yZXopHm0StFZvSjjKSNuFvy/WopPTGpxUZ5boCaF1CXumY7W +FL+jIap5faimLL9prIwaQKBwv80lAgMBAAECgYEAxvpHtgCgD849tqZYMgOTevCn +U/kwxltoMOClB39icNA+gxj8prc6FTTMwnVq0oGmS5UskX8k1yHCqUV1AvRU9o+q +I8L8a3F3TQKQieI/YjiUNK8A87bKkaiN65ooOnhT+I3ZjZMPR5YEyycimMp22jsv +LyX/35J/wf1rNiBs/YECQQDvtxgmMhE+PeajXqw1w2C3Jds27hI3RPDnamEyWr/L +KkSplbKTF6FuFDYOFdJNPrfxm1tx2MZ2cBfs+h/GnCJVAkEA75Z9w7q8obbqGBHW +9bpuFvLjW7bbqO7HBuXYX9zQcZL6GSArFP0ba5lhgH1qsVQfxVWVyiV9/chme7xc +ljfvkQJBAJ7MpSPQcRnRefNp6R0ok+5gFqt55PlWI1y6XS81bO7Szm+laooE0n0Q +yIpmLE3dqY9VgquVlkupkD/9poU0s40CQD118ZVAVht1/N9n1Cj9RjiE3mYspnTT +rCLM25Db6Gz6M0Y2xlaAB4S2uBhqE/Chj/TjW6WbsJJl0kRzsZynhMECQFYKiM1C +T4LB26ynW00VE8z4tEWSoYt4/Vn/5wFhalVjzoSJ8Hm2qZiObRYLQ1m0X4KnkShk +Gnl54dJHT+EhlfY= +-----END PRIVATE KEY-----""" async def setup_before_prep(self, module_test): expect_args = {"method": "GET", "uri": self.github_file_endpoint} @@ -82,5 +100,5 @@ def check(self, module_test, events): ] ), "Failed to visit URL" assert [ - e for e in events if e.type == "FINDING" and str(e.module) == "secretsdb" + e for e in events if e.type == "FINDING" and str(e.module) == "trufflehog" ], "Failed to find secret in repo file" diff --git a/bbot/test/test_step_2/module_tests/test_module_secretsdb.py b/bbot/test/test_step_2/module_tests/test_module_secretsdb.py deleted file mode 100644 index f735035bcc..0000000000 --- a/bbot/test/test_step_2/module_tests/test_module_secretsdb.py +++ /dev/null @@ -1,14 +0,0 @@ -from .base import ModuleTestBase - - -class TestSecretsDB(ModuleTestBase): - targets = ["http://127.0.0.1:8888"] - modules_overrides = ["httpx", "secretsdb"] - - async def setup_before_prep(self, module_test): - expect_args = {"method": "GET", "uri": "/"} - respond_args = {"response_data": "-----BEGIN PGP PRIVATE KEY BLOCK-----"} - module_test.set_expect_requests(expect_args=expect_args, respond_args=respond_args) - - def check(self, module_test, events): - assert any(e.type == "FINDING" for e in events) diff --git a/bbot/test/test_step_2/module_tests/test_module_trufflehog.py b/bbot/test/test_step_2/module_tests/test_module_trufflehog.py index ba923bc08a..fdda67069c 100644 --- a/bbot/test/test_step_2/module_tests/test_module_trufflehog.py +++ b/bbot/test/test_step_2/module_tests/test_module_trufflehog.py @@ -1240,3 +1240,17 @@ def check(self, module_test, events): and Path(e.data["path"]).is_file() ] ), "Failed to find blacklanternsecurity postman workspace" + + +class TestTrufflehog_HTTPResponse(ModuleTestBase): + targets = ["http://127.0.0.1:8888"] + modules_overrides = ["httpx", "trufflehog"] + config_overrides = {"modules": {"trufflehog": {"only_verified": False}}} + + async def setup_before_prep(self, module_test): + expect_args = {"method": "GET", "uri": "/"} + respond_args = {"response_data": "https://admin:admin@internal.host.com"} + module_test.set_expect_requests(expect_args=expect_args, respond_args=respond_args) + + def check(self, module_test, events): + assert any(e.type == "FINDING" for e in events) diff --git a/bbot/test/test_step_2/module_tests/test_module_web_report.py b/bbot/test/test_step_2/module_tests/test_module_web_report.py index c34eef00f2..d29f29a42b 100644 --- a/bbot/test/test_step_2/module_tests/test_module_web_report.py +++ b/bbot/test/test_step_2/module_tests/test_module_web_report.py @@ -3,10 +3,11 @@ class TestWebReport(ModuleTestBase): targets = ["http://127.0.0.1:8888"] - modules_overrides = ["httpx", "wappalyzer", "badsecrets", "web_report", "secretsdb"] + modules_overrides = ["httpx", "wappalyzer", "badsecrets", "web_report", "trufflehog"] + config_overrides = {"modules": {"trufflehog": {"only_verified": False}}} async def setup_before_prep(self, module_test): - # secretsdb --> FINDING + # trufflehog --> FINDING # wappalyzer --> TECHNOLOGY # badsecrets --> VULNERABILITY respond_args = {"response_data": web_body} @@ -45,7 +46,22 @@ def check(self, module_test, events): -

-----BEGIN PGP PRIVATE KEY BLOCK-----

+

-----BEGIN PRIVATE KEY----- +MIICdwIBADANBgkqhkiG9w0BAQEFAASCAmEwggJdAgEAAoGBAOBY2pd9PSQvuxqu +WXFNVgILTWuUc721Wc2sFNvp4beowhUe1lfxaq5ZfCJcz7z4QsqFhOeks69O9UIb +oiOTDocPDog9PHO8yZXopHm0StFZvSjjKSNuFvy/WopPTGpxUZ5boCaF1CXumY7W +FL+jIap5faimLL9prIwaQKBwv80lAgMBAAECgYEAxvpHtgCgD849tqZYMgOTevCn +U/kwxltoMOClB39icNA+gxj8prc6FTTMwnVq0oGmS5UskX8k1yHCqUV1AvRU9o+q +I8L8a3F3TQKQieI/YjiUNK8A87bKkaiN65ooOnhT+I3ZjZMPR5YEyycimMp22jsv +LyX/35J/wf1rNiBs/YECQQDvtxgmMhE+PeajXqw1w2C3Jds27hI3RPDnamEyWr/L +KkSplbKTF6FuFDYOFdJNPrfxm1tx2MZ2cBfs+h/GnCJVAkEA75Z9w7q8obbqGBHW +9bpuFvLjW7bbqO7HBuXYX9zQcZL6GSArFP0ba5lhgH1qsVQfxVWVyiV9/chme7xc +ljfvkQJBAJ7MpSPQcRnRefNp6R0ok+5gFqt55PlWI1y6XS81bO7Szm+laooE0n0Q +yIpmLE3dqY9VgquVlkupkD/9poU0s40CQD118ZVAVht1/N9n1Cj9RjiE3mYspnTT +rCLM25Db6Gz6M0Y2xlaAB4S2uBhqE/Chj/TjW6WbsJJl0kRzsZynhMECQFYKiM1C +T4LB26ynW00VE8z4tEWSoYt4/Vn/5wFhalVjzoSJ8Hm2qZiObRYLQ1m0X4KnkShk +Gnl54dJHT+EhlfY= +-----END PRIVATE KEY-----

"""