diff --git a/bbot/core/event/base.py b/bbot/core/event/base.py index bc6b804518..57d2108a85 100644 --- a/bbot/core/event/base.py +++ b/bbot/core/event/base.py @@ -867,16 +867,9 @@ def sanitize_data(self, data): parsed_path_lower = str(self.parsed.path).lower() - url_extension_blacklist = [] - url_extension_httpx_only = [] scan = getattr(self, "scan", None) - if scan is not None: - _url_extension_blacklist = scan.config.get("url_extension_blacklist", []) - _url_extension_httpx_only = scan.config.get("url_extension_httpx_only", []) - if _url_extension_blacklist: - url_extension_blacklist = [e.lower() for e in _url_extension_blacklist] - if _url_extension_httpx_only: - url_extension_httpx_only = [e.lower() for e in _url_extension_httpx_only] + url_extension_blacklist = getattr(scan, "url_extension_blacklist", []) + url_extension_httpx_only = getattr(scan, "url_extension_httpx_only", []) extension = get_file_extension(parsed_path_lower) if extension: diff --git a/bbot/defaults.yml b/bbot/defaults.yml index 5ed85b31dd..1baa559ea4 100644 --- a/bbot/defaults.yml +++ b/bbot/defaults.yml @@ -99,6 +99,8 @@ url_extension_blacklist: - woff - woff2 - ttf + - sass + - scss # audio - mp3 - m4a diff --git a/bbot/modules/bucket_file_enum.py b/bbot/modules/bucket_file_enum.py index 3375d8e10f..7eb6926c0f 100644 --- a/bbot/modules/bucket_file_enum.py +++ b/bbot/modules/bucket_file_enum.py @@ -13,8 +13,16 @@ class bucket_file_enum(BaseModule): "description": "Works in conjunction with the filedownload module to download files from open storage buckets. Currently supported cloud providers: AWS" } flags = ["passive", "safe", "cloud-enum"] + options = { + "file_limit": 50, + } + options_desc = {"file_limit": "Limit the number of files downloaded per bucket"} scope_distance_modifier = 2 + async def setup(self): + self.file_limit = self.config.get("file_limit", 50) + return True + async def handle_event(self, event): cloud_tags = (t for t in event.tags if t.startswith("cloud-")) if any(t.endswith("-amazon") or t.endswith("-digitalocean") for t in cloud_tags): @@ -22,6 +30,7 @@ async def handle_event(self, event): async def handle_aws(self, event): url = event.data["url"] + urls_emitted = 0 response = await self.helpers.request(url) status_code = getattr(response, "status_code", 0) if status_code == 200: @@ -31,4 +40,9 @@ async def handle_aws(self, event): keys = [key.text for key in root.findall(".//s3:Key", namespace)] for key in keys: bucket_file = url + "/" + key - self.emit_event(bucket_file, "URL_UNVERIFIED", source=event, tags="filedownload") + file_extension = self.helpers.get_file_extension(key) + if file_extension not in self.scan.url_extension_blacklist: + self.emit_event(bucket_file, "URL_UNVERIFIED", source=event, tags="filedownload") + urls_emitted += 1 + if urls_emitted >= self.file_limit: + return diff --git a/bbot/scanner/scanner.py b/bbot/scanner/scanner.py index 84c6233006..cf9851a3c9 100644 --- a/bbot/scanner/scanner.py +++ b/bbot/scanner/scanner.py @@ -230,6 +230,10 @@ def __init__( ) self.scope_report_distance = int(self.config.get("scope_report_distance", 1)) + # url file extensions + self.url_extension_blacklist = set(e.lower() for e in self.config.get("url_extension_blacklist", [])) + self.url_extension_httpx_only = set(e.lower() for e in self.config.get("url_extension_httpx_only", [])) + # custom HTTP headers warning self.custom_http_headers = self.config.get("http_headers", {}) if self.custom_http_headers: diff --git a/bbot/test/test_step_2/module_tests/test_module_bucket_file_enum.py b/bbot/test/test_step_2/module_tests/test_module_bucket_file_enum.py index 33f9a33c89..6361fe681c 100644 --- a/bbot/test/test_step_2/module_tests/test_module_bucket_file_enum.py +++ b/bbot/test/test_step_2/module_tests/test_module_bucket_file_enum.py @@ -4,6 +4,7 @@ class TestBucket_File_Enum(ModuleTestBase): targets = ["http://127.0.0.1:8888"] modules_overrides = ["bucket_file_enum", "filedownload", "httpx", "excavate"] + config_overrides = {"scope_report_distance": 5} open_bucket_url = "https://testbucket.s3.amazonaws.com/" open_bucket_body = """testbucket1000falseindex.html2023-05-22T23:04:38.000Z"4a2d2d114f3abf90f8bd127c1f25095a"5STANDARDtest.pdf2022-04-30T21:13:40.000Z"723b0018c2f5a7ef06a34f84f6fa97e4"388901STANDARD""" @@ -25,8 +26,15 @@ async def setup_before_prep(self, module_test): text=self.pdf_data, headers={"Content-Type": "application/pdf"}, ) + module_test.httpx_mock.add_response( + url=f"{self.open_bucket_url}test.css", + text="", + ) def check(self, module_test, events): download_dir = module_test.scan.home / "filedownload" files = list(download_dir.glob("*.pdf")) + assert any(e.type == "URL_UNVERIFIED" and e.data.endswith("test.pdf") for e in events) + assert not any(e.type == "URL_UNVERIFIED" and e.data.endswith("test.css") for e in events) assert any(f.name.endswith("test.pdf") for f in files), "Failed to download PDF file from open bucket" + assert not any(f.name.endswith("test.css") for f in files), "Unwanted CSS file was downloaded"