diff --git a/bbot/core/event/base.py b/bbot/core/event/base.py index 32d6f7a3a1..1ada2b3b07 100644 --- a/bbot/core/event/base.py +++ b/bbot/core/event/base.py @@ -1352,18 +1352,22 @@ def sanitize_data(self, data): self.parsed_url = self.validators.validate_url_parsed(url) data["url"] = self.parsed_url.geturl() - header_dict = {} - for i in data.get("raw_header", "").splitlines(): - if len(i) > 0 and ":" in i: - k, v = i.split(":", 1) - k = k.strip().lower() - v = v.lstrip() - if k in header_dict: - header_dict[k].append(v) - else: - header_dict[k] = [v] + if not "raw_header" in data: + raise ValueError("raw_header is required for HTTP_RESPONSE events") + + if "header-dict" not in data: + header_dict = {} + for i in data.get("raw_header", "").splitlines(): + if len(i) > 0 and ":" in i: + k, v = i.split(":", 1) + k = k.strip().lower() + v = v.lstrip() + if k in header_dict: + header_dict[k].append(v) + else: + header_dict[k] = [v] + data["header-dict"] = header_dict - data["header-dict"] = header_dict # move URL to the front of the dictionary for visibility data = dict(data) new_data = {"url": data.pop("url")} @@ -1377,6 +1381,13 @@ def _words(self): def _pretty_string(self): return f'{self.data["hash"]["header_mmh3"]}:{self.data["hash"]["body_mmh3"]}' + @property + def raw_response(self): + """ + Formats the status code, headers, and body into a single string formatted as an HTTP/1.1 response. + """ + return f'{self.data["raw_header"]}{self.data["body"]}' + @property def http_status(self): try: diff --git a/bbot/modules/dnsbrute_mutations.py b/bbot/modules/dnsbrute_mutations.py index ef0b7a0337..1d063d1e73 100644 --- a/bbot/modules/dnsbrute_mutations.py +++ b/bbot/modules/dnsbrute_mutations.py @@ -1,3 +1,5 @@ +import time + from bbot.modules.base import BaseModule @@ -40,8 +42,11 @@ async def handle_event(self, event): except KeyError: self.found[domain] = {subdomain} - def get_parent_event(self, subdomain): - parent_host = self.helpers.closest_match(subdomain, self.parent_events) + async def get_parent_event(self, subdomain): + start = time.time() + parent_host = await self.helpers.run_in_executor(self.helpers.closest_match, subdomain, self.parent_events) + elapsed = time.time() - start + self.trace(f"{subdomain}: got closest match among {len(self.parent_events):,} parent events in {elapsed:.2f}s") return self.parent_events[parent_host] async def finish(self): diff --git a/bbot/modules/secretsdb.py b/bbot/modules/secretsdb.py deleted file mode 100644 index 2d70e538d2..0000000000 --- a/bbot/modules/secretsdb.py +++ /dev/null @@ -1,78 +0,0 @@ -import re -import yaml - -from .base import BaseModule - - -class secretsdb(BaseModule): - watched_events = ["HTTP_RESPONSE"] - produced_events = ["FINDING"] - flags = ["active", "safe", "web-basic"] - meta = { - "description": "Detect common secrets with secrets-patterns-db", - "created_date": "2023-03-17", - "author": "@TheTechromancer", - } - options = { - "min_confidence": 99, - "signatures": "https://raw.githubusercontent.com/blacklanternsecurity/secrets-patterns-db/master/db/rules-stable.yml", - } - options_desc = { - "min_confidence": "Only use signatures with this confidence score or higher", - "signatures": "File path or URL to YAML signatures", - } - deps_pip = ["pyyaml~=6.0"] - # accept any HTTP_RESPONSE including out-of-scope ones (such as from github_codesearch) - scope_distance_modifier = 3 - - async def setup(self): - self.rules = [] - self.min_confidence = self.config.get("min_confidence", 99) - self.sig_file = await self.helpers.wordlist(self.config.get("signatures", "")) - with open(self.sig_file) as f: - rules_yaml = yaml.safe_load(f).get("patterns", []) - for r in rules_yaml: - r = r.get("pattern", {}) - if not r: - continue - name = r.get("name", "").lower() - confidence = r.get("confidence", "") - if name and confidence >= self.min_confidence: - regex = r.get("regex", "") - try: - compiled_regex = re.compile(regex) - r["regex"] = compiled_regex - self.rules.append(r) - except Exception: - self.debug(f"Error compiling regex: r'{regex}'") - return True - - async def handle_event(self, event): - resp_body = event.data.get("body", "") - resp_headers = event.data.get("raw_header", "") - all_matches = await self.helpers.run_in_executor(self.search_data, resp_body, resp_headers) - for matches, name in all_matches: - matches = [m.string[m.start() : m.end()] for m in matches] - description = f"Possible secret ({name}): {matches}" - event_data = {"host": str(event.host), "description": description} - parsed_url = getattr(event, "parsed_url", None) - if parsed_url: - event_data["url"] = parsed_url.geturl() - await self.emit_event( - event_data, - "FINDING", - parent=event, - context=f"{{module}} searched HTTP response and found {{event.type}}: {description}", - ) - - def search_data(self, resp_body, resp_headers): - all_matches = [] - for r in self.rules: - regex = r["regex"] - name = r["name"] - for text in (resp_body, resp_headers): - if text: - matches = list(regex.finditer(text)) - if matches: - all_matches.append((matches, name)) - return all_matches diff --git a/bbot/modules/trufflehog.py b/bbot/modules/trufflehog.py index 8441c73648..f3508f4578 100644 --- a/bbot/modules/trufflehog.py +++ b/bbot/modules/trufflehog.py @@ -3,7 +3,7 @@ class trufflehog(BaseModule): - watched_events = ["CODE_REPOSITORY", "FILESYSTEM"] + watched_events = ["CODE_REPOSITORY", "FILESYSTEM", "HTTP_RESPONSE"] produced_events = ["FINDING", "VULNERABILITY"] flags = ["passive", "safe", "code-enum"] meta = { @@ -86,7 +86,7 @@ async def handle_event(self, event): path = event.data["url"] if "git" in event.tags: module = "github-experimental" - else: + elif event.type == "FILESYSTEM": path = event.data["path"] if "git" in event.tags: module = "git" @@ -96,6 +96,10 @@ async def handle_event(self, event): module = "postman" else: module = "filesystem" + elif event.type == "HTTP_RESPONSE": + module = "filesystem" + path = self.helpers.tempfile(event.raw_response, pipe=False) + if event.type == "CODE_REPOSITORY": host = event.host else: @@ -108,41 +112,31 @@ async def handle_event(self, event): verified, source_metadata, ) in self.execute_trufflehog(module, path): - if verified: - data = { - "severity": "High", - "description": f"Verified Secret Found. Detector Type: [{detector_name}] Decoder Type: [{decoder_name}] Details: [{source_metadata}]", - "host": host, - } - if description: - data["description"] += f" Description: [{description}]" - data["description"] += f" Raw result: [{raw_result}]" - if rawv2_result: - data["description"] += f" RawV2 result: [{rawv2_result}]" - await self.emit_event( - data, - "VULNERABILITY", - event, - context=f'{{module}} searched {event.type} using "{module}" method and found verified secret ({{event.type}}): {raw_result}', - ) - else: - data = { - "description": f"Potential Secret Found. Detector Type: [{detector_name}] Decoder Type: [{decoder_name}] Details: [{source_metadata}]", - "host": host, - } - if description: - data["description"] += f" Description: [{description}]" - data["description"] += f" Raw result: [{raw_result}]" - if rawv2_result: - data["description"] += f" RawV2 result: [{rawv2_result}]" - await self.emit_event( - data, - "FINDING", - event, - context=f'{{module}} searched {event.type} using "{module}" method and found possible secret ({{event.type}}): {raw_result}', - ) - - async def execute_trufflehog(self, module, path): + verified_str = "Verified" if verified else "Possible" + finding_type = "VULNERABILITY" if verified else "FINDING" + data = { + "description": f"{verified_str} Secret Found. Detector Type: [{detector_name}] Decoder Type: [{decoder_name}] Details: [{source_metadata}]", + "host": host, + } + if finding_type == "VULNERABILITY": + data["severity"] = "High" + if description: + data["description"] += f" Description: [{description}]" + data["description"] += f" Raw result: [{raw_result}]" + if rawv2_result: + data["description"] += f" RawV2 result: [{rawv2_result}]" + await self.emit_event( + data, + finding_type, + event, + context=f'{{module}} searched {event.type} using "{module}" method and found {verified_str.lower()} secret ({{event.type}}): {raw_result}', + ) + + # clean up the tempfile when we're done with it + if event.type == "HTTP_RESPONSE": + path.unlink(missing_ok=True) + + async def execute_trufflehog(self, module, path=None, string=None): command = [ "trufflehog", "--json", diff --git a/bbot/test/test_step_1/test_events.py b/bbot/test/test_step_1/test_events.py index 195f08ea89..78a01d7923 100644 --- a/bbot/test/test_step_1/test_events.py +++ b/bbot/test/test_step_1/test_events.py @@ -149,6 +149,7 @@ async def test_events(events, helpers): "title": "HTTP%20RESPONSE", "url": "http://www.evilcorp.com:80", "input": "http://www.evilcorp.com:80", + "raw_header": "HTTP/1.1 301 Moved Permanently\r\nLocation: http://www.evilcorp.com/asdf\r\n\r\n", "location": "/asdf", "status_code": 301, }, @@ -161,7 +162,13 @@ async def test_events(events, helpers): # http response url validation http_response_2 = scan.make_event( - {"port": "80", "url": "http://evilcorp.com:80/asdf"}, "HTTP_RESPONSE", dummy=True + { + "port": "80", + "url": "http://evilcorp.com:80/asdf", + "raw_header": "HTTP/1.1 301 Moved Permanently\r\nLocation: http://www.evilcorp.com/asdf\r\n\r\n", + }, + "HTTP_RESPONSE", + dummy=True, ) assert http_response_2.data["url"] == "http://evilcorp.com/asdf" @@ -546,6 +553,10 @@ async def test_events(events, helpers): http_response = scan.make_event(httpx_response, "HTTP_RESPONSE", parent=scan.root_event) assert http_response.parent_id == scan.root_event.id assert http_response.data["input"] == "http://example.com:80" + assert ( + http_response.raw_response + == 'HTTP/1.1 200 OK\r\nConnection: close\r\nAge: 526111\r\nCache-Control: max-age=604800\r\nContent-Type: text/html; charset=UTF-8\r\nDate: Mon, 14 Nov 2022 17:14:27 GMT\r\nEtag: "3147526947+ident+gzip"\r\nExpires: Mon, 21 Nov 2022 17:14:27 GMT\r\nLast-Modified: Thu, 17 Oct 2019 07:18:26 GMT\r\nServer: ECS (agb/A445)\r\nVary: Accept-Encoding\r\nX-Cache: HIT\r\n\r\n\n\n
\nThis domain is for use in illustrative examples in documents. You may use this\n domain in literature without prior coordination or asking for permission.
\n \n-----BEGIN PGP PRIVATE KEY BLOCK-----
+-----BEGIN PRIVATE KEY----- +MIICdwIBADANBgkqhkiG9w0BAQEFAASCAmEwggJdAgEAAoGBAOBY2pd9PSQvuxqu +WXFNVgILTWuUc721Wc2sFNvp4beowhUe1lfxaq5ZfCJcz7z4QsqFhOeks69O9UIb +oiOTDocPDog9PHO8yZXopHm0StFZvSjjKSNuFvy/WopPTGpxUZ5boCaF1CXumY7W +FL+jIap5faimLL9prIwaQKBwv80lAgMBAAECgYEAxvpHtgCgD849tqZYMgOTevCn +U/kwxltoMOClB39icNA+gxj8prc6FTTMwnVq0oGmS5UskX8k1yHCqUV1AvRU9o+q +I8L8a3F3TQKQieI/YjiUNK8A87bKkaiN65ooOnhT+I3ZjZMPR5YEyycimMp22jsv +LyX/35J/wf1rNiBs/YECQQDvtxgmMhE+PeajXqw1w2C3Jds27hI3RPDnamEyWr/L +KkSplbKTF6FuFDYOFdJNPrfxm1tx2MZ2cBfs+h/GnCJVAkEA75Z9w7q8obbqGBHW +9bpuFvLjW7bbqO7HBuXYX9zQcZL6GSArFP0ba5lhgH1qsVQfxVWVyiV9/chme7xc +ljfvkQJBAJ7MpSPQcRnRefNp6R0ok+5gFqt55PlWI1y6XS81bO7Szm+laooE0n0Q +yIpmLE3dqY9VgquVlkupkD/9poU0s40CQD118ZVAVht1/N9n1Cj9RjiE3mYspnTT +rCLM25Db6Gz6M0Y2xlaAB4S2uBhqE/Chj/TjW6WbsJJl0kRzsZynhMECQFYKiM1C +T4LB26ynW00VE8z4tEWSoYt4/Vn/5wFhalVjzoSJ8Hm2qZiObRYLQ1m0X4KnkShk +Gnl54dJHT+EhlfY= +-----END PRIVATE KEY-----