diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index f02837b14..35f83a893 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -46,7 +46,7 @@ jobs: poetry install - name: Run tests run: | - poetry run pytest --exitfirst --reruns 2 -o timeout_func_only=true --timeout 600 --disable-warnings --log-cli-level=DEBUG --cov-config=bbot/test/coverage.cfg --cov-report xml:cov.xml --cov=bbot . + poetry run pytest --exitfirst --reruns 2 -o timeout_func_only=true --timeout 1200 --disable-warnings --log-cli-level=DEBUG --cov-config=bbot/test/coverage.cfg --cov-report xml:cov.xml --cov=bbot . - name: Upload Code Coverage uses: codecov/codecov-action@v3 with: diff --git a/bbot/core/event/base.py b/bbot/core/event/base.py index 92905bd01..57d2108a8 100644 --- a/bbot/core/event/base.py +++ b/bbot/core/event/base.py @@ -867,16 +867,9 @@ def sanitize_data(self, data): parsed_path_lower = str(self.parsed.path).lower() - url_extension_blacklist = [] - url_extension_httpx_only = [] scan = getattr(self, "scan", None) - if scan is not None: - _url_extension_blacklist = scan.config.get("url_extension_blacklist", []) - _url_extension_httpx_only = scan.config.get("url_extension_httpx_only", []) - if _url_extension_blacklist: - url_extension_blacklist = [e.lower() for e in _url_extension_blacklist] - if _url_extension_httpx_only: - url_extension_httpx_only = [e.lower() for e in _url_extension_httpx_only] + url_extension_blacklist = getattr(scan, "url_extension_blacklist", []) + url_extension_httpx_only = getattr(scan, "url_extension_httpx_only", []) extension = get_file_extension(parsed_path_lower) if extension: @@ -934,6 +927,7 @@ class STORAGE_BUCKET(DictEvent, URL_UNVERIFIED): class _data_validator(BaseModel): name: str url: str + _validate_url = field_validator("url")(validators.validate_url) def _words(self): return self.data["name"] @@ -1009,6 +1003,7 @@ class _data_validator(BaseModel): severity: str description: str url: Optional[str] = None + _validate_url = field_validator("url")(validators.validate_url) _validate_host = field_validator("host")(validators.validate_host) _validate_severity = field_validator("severity")(validators.validate_severity) @@ -1023,6 +1018,7 @@ class _data_validator(BaseModel): host: str description: str url: Optional[str] = None + _validate_url = field_validator("url")(validators.validate_url) _validate_host = field_validator("host")(validators.validate_host) def _pretty_string(self): @@ -1034,6 +1030,7 @@ class _data_validator(BaseModel): host: str technology: str url: Optional[str] = None + _validate_url = field_validator("url")(validators.validate_url) _validate_host = field_validator("host")(validators.validate_host) def _data_id(self): @@ -1050,6 +1047,7 @@ class _data_validator(BaseModel): host: str vhost: str url: Optional[str] = None + _validate_url = field_validator("url")(validators.validate_url) _validate_host = field_validator("host")(validators.validate_host) def _pretty_string(self): diff --git a/bbot/defaults.yml b/bbot/defaults.yml index 5ed85b31d..1baa559ea 100644 --- a/bbot/defaults.yml +++ b/bbot/defaults.yml @@ -99,6 +99,8 @@ url_extension_blacklist: - woff - woff2 - ttf + - sass + - scss # audio - mp3 - m4a diff --git a/bbot/modules/bucket_digitalocean.py b/bbot/modules/bucket_digitalocean.py index 5c92692f7..c467fde6e 100644 --- a/bbot/modules/bucket_digitalocean.py +++ b/bbot/modules/bucket_digitalocean.py @@ -17,4 +17,4 @@ class bucket_digitalocean(bucket_template): regions = ["ams3", "fra1", "nyc3", "sfo2", "sfo3", "sgp1"] def build_url(self, bucket_name, base_domain, region): - return f"https://{bucket_name}.{region}.{base_domain}" + return f"https://{bucket_name}.{region}.{base_domain}/" diff --git a/bbot/modules/bucket_file_enum.py b/bbot/modules/bucket_file_enum.py new file mode 100644 index 000000000..7eb6926c0 --- /dev/null +++ b/bbot/modules/bucket_file_enum.py @@ -0,0 +1,48 @@ +from bbot.modules.base import BaseModule +import xml.etree.ElementTree as ET + + +class bucket_file_enum(BaseModule): + """ + Enumerate files in a public bucket + """ + + watched_events = ["STORAGE_BUCKET"] + produced_events = ["URL_UNVERIFIED"] + meta = { + "description": "Works in conjunction with the filedownload module to download files from open storage buckets. Currently supported cloud providers: AWS" + } + flags = ["passive", "safe", "cloud-enum"] + options = { + "file_limit": 50, + } + options_desc = {"file_limit": "Limit the number of files downloaded per bucket"} + scope_distance_modifier = 2 + + async def setup(self): + self.file_limit = self.config.get("file_limit", 50) + return True + + async def handle_event(self, event): + cloud_tags = (t for t in event.tags if t.startswith("cloud-")) + if any(t.endswith("-amazon") or t.endswith("-digitalocean") for t in cloud_tags): + await self.handle_aws(event) + + async def handle_aws(self, event): + url = event.data["url"] + urls_emitted = 0 + response = await self.helpers.request(url) + status_code = getattr(response, "status_code", 0) + if status_code == 200: + content = response.text + root = ET.fromstring(content) + namespace = {"s3": "http://s3.amazonaws.com/doc/2006-03-01/"} + keys = [key.text for key in root.findall(".//s3:Key", namespace)] + for key in keys: + bucket_file = url + "/" + key + file_extension = self.helpers.get_file_extension(key) + if file_extension not in self.scan.url_extension_blacklist: + self.emit_event(bucket_file, "URL_UNVERIFIED", source=event, tags="filedownload") + urls_emitted += 1 + if urls_emitted >= self.file_limit: + return diff --git a/bbot/modules/filedownload.py b/bbot/modules/filedownload.py index ad4128e90..4b43e2834 100644 --- a/bbot/modules/filedownload.py +++ b/bbot/modules/filedownload.py @@ -77,7 +77,7 @@ class filedownload(BaseModule): "max_filesize": "Cancel download if filesize is greater than this size", } - scope_distance_modifier = 1 + scope_distance_modifier = 3 async def setup(self): self.extensions = list(set([e.lower().strip(".") for e in self.options.get("extensions", [])])) @@ -101,8 +101,11 @@ async def filter_event(self, event): # accept file download requests from other modules if "filedownload" in event.tags: return True - if self.hash_event(event) in self.urls_downloaded: - return False, f"Already processed {event}" + else: + if event.scope_distance > 1: + return False, f"{event} not within scope distance" + elif self.hash_event(event) in self.urls_downloaded: + return False, f"Already processed {event}" return True def hash_event(self, event): @@ -113,7 +116,9 @@ def hash_event(self, event): async def handle_event(self, event): if event.type == "URL_UNVERIFIED": url_lower = event.data.lower() - if any(url_lower.endswith(f".{e}") for e in self.extensions): + extension_matches = any(url_lower.endswith(f".{e}") for e in self.extensions) + filedownload_requested = "filedownload" in event.tags + if extension_matches or filedownload_requested: await self.download_file(event.data) elif event.type == "HTTP_RESPONSE": content_type = event.data["header"].get("content_type", "") diff --git a/bbot/modules/templates/bucket.py b/bbot/modules/templates/bucket.py index 2456d1449..eef8f5bee 100644 --- a/bbot/modules/templates/bucket.py +++ b/bbot/modules/templates/bucket.py @@ -126,7 +126,7 @@ def valid_bucket_name(self, bucket_name): return False def build_url(self, bucket_name, base_domain, region): - return f"https://{bucket_name}.{base_domain}" + return f"https://{bucket_name}.{base_domain}/" def gen_tags_exists(self, response): return set() diff --git a/bbot/scanner/scanner.py b/bbot/scanner/scanner.py index 84c623300..cf9851a3c 100644 --- a/bbot/scanner/scanner.py +++ b/bbot/scanner/scanner.py @@ -230,6 +230,10 @@ def __init__( ) self.scope_report_distance = int(self.config.get("scope_report_distance", 1)) + # url file extensions + self.url_extension_blacklist = set(e.lower() for e in self.config.get("url_extension_blacklist", [])) + self.url_extension_httpx_only = set(e.lower() for e in self.config.get("url_extension_httpx_only", [])) + # custom HTTP headers warning self.custom_http_headers = self.config.get("http_headers", {}) if self.custom_http_headers: diff --git a/bbot/test/conftest.py b/bbot/test/conftest.py index 38069366b..4dcf8ed21 100644 --- a/bbot/test/conftest.py +++ b/bbot/test/conftest.py @@ -25,7 +25,7 @@ def pytest_sessionfinish(session, exitstatus): @pytest.fixture def non_mocked_hosts() -> list: - return ["127.0.0.1", "localhost", "githubusercontent.com"] + interactsh_servers + return ["127.0.0.1", "localhost", "raw.githubusercontent.com"] + interactsh_servers @pytest.fixture diff --git a/bbot/test/test_step_1/test_cloud_helpers.py b/bbot/test/test_step_1/test_cloud_helpers.py index 7dea8cb30..b42da11a7 100644 --- a/bbot/test/test_step_1/test_cloud_helpers.py +++ b/bbot/test/test_step_1/test_cloud_helpers.py @@ -9,7 +9,6 @@ async def test_cloud_helpers(bbot_scanner, bbot_config): for provider_name in provider_names: assert provider_name in scan1.helpers.cloud.providers.providers - log.critical(scan1.helpers.cloud.providers.providers) for p in scan1.helpers.cloud.providers.providers.values(): print(f"{p.name}: {p.domains} / {p.ranges}") amazon_ranges = list(scan1.helpers.cloud["amazon"].ranges) @@ -30,12 +29,10 @@ async def test_cloud_helpers(bbot_scanner, bbot_config): other_event3._resolved_hosts = {"asdf.amazonaws.com"} for event in (ip_event, aws_event1, aws_event2, aws_event4, other_event2, other_event3): - log.critical(event) await scan1.helpers.cloud.tag_event(event) assert "cloud-amazon" in event.tags, f"{event} was not properly cloud-tagged" for event in (aws_event3, other_event1): - log.critical(event) await scan1.helpers.cloud.tag_event(event) assert "cloud-amazon" not in event.tags, f"{event} was improperly cloud-tagged" assert not any( diff --git a/bbot/test/test_step_2/module_tests/test_module_bucket_amazon.py b/bbot/test/test_step_2/module_tests/test_module_bucket_amazon.py index 0f3df7cf9..6d58dd36f 100644 --- a/bbot/test/test_step_2/module_tests/test_module_bucket_amazon.py +++ b/bbot/test/test_step_2/module_tests/test_module_bucket_amazon.py @@ -37,9 +37,9 @@ def modules_overrides(self): return ["excavate", "speculate", "httpx", self.module_name] def url_setup(self): - self.url_1 = f"https://{self.random_bucket_1}" - self.url_2 = f"https://{self.random_bucket_2}" - self.url_3 = f"https://{self.random_bucket_3}" + self.url_1 = f"https://{self.random_bucket_1}/" + self.url_2 = f"https://{self.random_bucket_2}/" + self.url_3 = f"https://{self.random_bucket_3}/" def bucket_setup(self): self.url_setup() @@ -83,14 +83,14 @@ def check(self, module_test, events): url = e.data.get("url", "") assert self.random_bucket_2 in url assert not self.random_bucket_1 in url - assert not f"{self.random_bucket_3}" in url + assert not self.random_bucket_3 in url # make sure bucket mutations were found assert any( e.type == "STORAGE_BUCKET" and str(e.module) == self.module_name and f"{random_bucket_name_3}" in e.data["url"] for e in events - ), f'bucket (dev mutation) not found for module "{self.module_name}"' + ), f'bucket (dev mutation: {self.random_bucket_3}) not found for module "{self.module_name}"' class TestBucket_Amazon(Bucket_Amazon_Base): diff --git a/bbot/test/test_step_2/module_tests/test_module_bucket_file_enum.py b/bbot/test/test_step_2/module_tests/test_module_bucket_file_enum.py new file mode 100644 index 000000000..6361fe681 --- /dev/null +++ b/bbot/test/test_step_2/module_tests/test_module_bucket_file_enum.py @@ -0,0 +1,40 @@ +from .base import ModuleTestBase + + +class TestBucket_File_Enum(ModuleTestBase): + targets = ["http://127.0.0.1:8888"] + modules_overrides = ["bucket_file_enum", "filedownload", "httpx", "excavate"] + config_overrides = {"scope_report_distance": 5} + + open_bucket_url = "https://testbucket.s3.amazonaws.com/" + open_bucket_body = """testbucket1000falseindex.html2023-05-22T23:04:38.000Z"4a2d2d114f3abf90f8bd127c1f25095a"5STANDARDtest.pdf2022-04-30T21:13:40.000Z"723b0018c2f5a7ef06a34f84f6fa97e4"388901STANDARD""" + + pdf_data = """%PDF-1. +1 0 obj<>endobj +2 0 obj<>endobj +3 0 obj<>endobj +trailer <>""" + + async def setup_before_prep(self, module_test): + module_test.httpserver.expect_request("/").respond_with_data(f'') + module_test.httpx_mock.add_response( + url=self.open_bucket_url, + text=self.open_bucket_body, + ) + module_test.httpx_mock.add_response( + url=f"{self.open_bucket_url}test.pdf", + text=self.pdf_data, + headers={"Content-Type": "application/pdf"}, + ) + module_test.httpx_mock.add_response( + url=f"{self.open_bucket_url}test.css", + text="", + ) + + def check(self, module_test, events): + download_dir = module_test.scan.home / "filedownload" + files = list(download_dir.glob("*.pdf")) + assert any(e.type == "URL_UNVERIFIED" and e.data.endswith("test.pdf") for e in events) + assert not any(e.type == "URL_UNVERIFIED" and e.data.endswith("test.css") for e in events) + assert any(f.name.endswith("test.pdf") for f in files), "Failed to download PDF file from open bucket" + assert not any(f.name.endswith("test.css") for f in files), "Unwanted CSS file was downloaded" diff --git a/bbot/test/test_step_2/module_tests/test_module_filedownload.py b/bbot/test/test_step_2/module_tests/test_module_filedownload.py index e4471d159..2c04003e1 100644 --- a/bbot/test/test_step_2/module_tests/test_module_filedownload.py +++ b/bbot/test/test_step_2/module_tests/test_module_filedownload.py @@ -12,14 +12,6 @@ class TestFileDownload(ModuleTestBase): 3 0 obj<>endobj trailer <>""" - async def setup_before_prep(self, module_test): - module_test.httpx_mock.add_response( - url="https://raw.githubusercontent.com/jshttp/mime-db/master/db.json", - json={ - "application/pdf": {"source": "iana", "compressible": False, "extensions": ["pdf"]}, - }, - ) - async def setup_after_prep(self, module_test): module_test.set_expect_requests( dict(uri="/"), diff --git a/bbot/test/test_step_2/module_tests/test_module_massdns.py b/bbot/test/test_step_2/module_tests/test_module_massdns.py index 04f4860dd..1b4543788 100644 --- a/bbot/test/test_step_2/module_tests/test_module_massdns.py +++ b/bbot/test/test_step_2/module_tests/test_module_massdns.py @@ -5,12 +5,6 @@ class TestMassdns(ModuleTestBase): subdomain_wordlist = tempwordlist(["www", "asdf"]) config_overrides = {"modules": {"massdns": {"wordlist": str(subdomain_wordlist)}}} - async def setup_before_prep(self, module_test): - module_test.httpx_mock.add_response( - url="https://raw.githubusercontent.com/blacklanternsecurity/public-dns-servers/master/nameservers.txt", - text="8.8.8.8\n8.8.4.4\n1.1.1.1", - ) - def check(self, module_test, events): assert any(e.data == "www.blacklanternsecurity.com" for e in events) assert not any(e.data == "asdf.blacklanternsecurity.com" for e in events) diff --git a/bbot/test/test_step_2/module_tests/test_module_secretsdb.py b/bbot/test/test_step_2/module_tests/test_module_secretsdb.py index 67b6e28bc..f735035bc 100644 --- a/bbot/test/test_step_2/module_tests/test_module_secretsdb.py +++ b/bbot/test/test_step_2/module_tests/test_module_secretsdb.py @@ -6,14 +6,6 @@ class TestSecretsDB(ModuleTestBase): modules_overrides = ["httpx", "secretsdb"] async def setup_before_prep(self, module_test): - module_test.httpx_mock.add_response( - url="https://raw.githubusercontent.com/blacklanternsecurity/secrets-patterns-db/master/db/rules-stable.yml", - text="""patterns: -- pattern: - confidence: 99 - name: Asymmetric Private Key - regex: '-----BEGIN ((EC|PGP|DSA|RSA|OPENSSH) )?PRIVATE KEY( BLOCK)?-----'""", - ) expect_args = {"method": "GET", "uri": "/"} respond_args = {"response_data": "-----BEGIN PGP PRIVATE KEY BLOCK-----"} module_test.set_expect_requests(expect_args=expect_args, respond_args=respond_args) diff --git a/bbot/test/test_step_2/module_tests/test_module_subdomain_hijack.py b/bbot/test/test_step_2/module_tests/test_module_subdomain_hijack.py index ace21d352..7aa6a6680 100644 --- a/bbot/test/test_step_2/module_tests/test_module_subdomain_hijack.py +++ b/bbot/test/test_step_2/module_tests/test_module_subdomain_hijack.py @@ -5,25 +5,6 @@ class TestSubdomain_Hijack(ModuleTestBase): targets = ["http://127.0.0.1:8888"] modules_overrides = ["httpx", "excavate", "subdomain_hijack"] - async def setup_before_prep(self, module_test): - module_test.httpx_mock.add_response( - url="https://raw.githubusercontent.com/EdOverflow/can-i-take-over-xyz/master/fingerprints.json", - json=[ - { - "cicd_pass": True, - "cname": ["us-east-1.elasticbeanstalk.com"], - "discussion": "[Issue #194](https://github.com/EdOverflow/can-i-take-over-xyz/issues/194)", - "documentation": "", - "fingerprint": "NXDOMAIN", - "http_status": None, - "nxdomain": True, - "service": "AWS/Elastic Beanstalk", - "status": "Vulnerable", - "vulnerable": True, - } - ], - ) - async def setup_after_prep(self, module_test): fingerprints = module_test.module.fingerprints assert fingerprints, "No subdomain hijacking fingerprints available" diff --git a/bbot/test/test_step_2/module_tests/test_module_web_report.py b/bbot/test/test_step_2/module_tests/test_module_web_report.py index aa51d501a..a37c178e2 100644 --- a/bbot/test/test_step_2/module_tests/test_module_web_report.py +++ b/bbot/test/test_step_2/module_tests/test_module_web_report.py @@ -7,20 +7,14 @@ class TestWebReport(ModuleTestBase): async def setup_before_prep(self, module_test): # secretsdb --> FINDING - module_test.httpx_mock.add_response( - url="https://raw.githubusercontent.com/blacklanternsecurity/secrets-patterns-db/master/db/rules-stable.yml", - text="""patterns: -- pattern: - confidence: 99 - name: Asymmetric Private Key - regex: '-----BEGIN ((EC|PGP|DSA|RSA|OPENSSH) )?PRIVATE KEY( BLOCK)?-----'""", - ) # wappalyzer --> TECHNOLOGY # badsecrets --> VULNERABILITY respond_args = {"response_data": web_body} module_test.set_expect_requests(respond_args=respond_args) def check(self, module_test, events): + for e in events: + module_test.log.critical(e) report_file = module_test.scan.home / "web_report.html" with open(report_file) as f: report_content = f.read() @@ -31,12 +25,7 @@ def check(self, module_test, events):
  • http://127.0.0.1:8888/""" in report_content ) - assert ( - """

    FINDING

    -