From 7b6264557c2a7043c15a2d9f6c7a8ca74c87a9ac Mon Sep 17 00:00:00 2001 From: Aconite33 Date: Wed, 18 Oct 2023 11:49:26 -0600 Subject: [PATCH 1/9] Created module for enumerating AWS S3 Bucket files. --- bbot/modules/bucket_enum.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 bbot/modules/bucket_enum.py diff --git a/bbot/modules/bucket_enum.py b/bbot/modules/bucket_enum.py new file mode 100644 index 000000000..207a1573e --- /dev/null +++ b/bbot/modules/bucket_enum.py @@ -0,0 +1,24 @@ +from bbot.modules.base import BaseModule +import xml.etree.ElementTree as ET + +class bucket_enum(BaseModule): + """ + Enumerate files in a public bucket + """ + scope_distance_modifier = 1 + watched_events = ["STORAGE_BUCKET"] + produced_events = ["BUCKET_FILE"] + flags = ["passive", "safe", "cloud-enum"] + + async def handle_event(self, event): + url = event.data["url"] + response = await self.helpers.request(url) + if response.status_code == 200: + content = response.text + root = ET.fromstring(content) + namespace = {'s3': 'http://s3.amazonaws.com/doc/2006-03-01/'} + keys = [key.text for key in root.findall('.//s3:Key', namespace)] + self.hugesuccess(f"Keys: {keys}") + for key in keys: + bucket_file = url + "/" + key + self.emit_event(bucket_file, "BUCKET_FILE", source=event) \ No newline at end of file From 45df5ae7447af6daa5cd2facd3f08455abfe8381 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Mon, 13 Nov 2023 12:39:36 -0500 Subject: [PATCH 2/9] add tests --- bbot/core/event/base.py | 5 +++ bbot/modules/bucket_digitalocean.py | 2 +- bbot/modules/bucket_enum.py | 24 ------------- bbot/modules/bucket_file_enum.py | 34 +++++++++++++++++++ bbot/modules/filedownload.py | 13 ++++--- bbot/modules/templates/bucket.py | 2 +- bbot/test/conftest.py | 4 +-- bbot/test/test_step_1/test_cloud_helpers.py | 3 -- .../module_tests/test_module_bucket_amazon.py | 10 +++--- .../test_module_bucket_file_enum.py | 32 +++++++++++++++++ 10 files changed, 89 insertions(+), 40 deletions(-) delete mode 100644 bbot/modules/bucket_enum.py create mode 100644 bbot/modules/bucket_file_enum.py create mode 100644 bbot/test/test_step_2/module_tests/test_module_bucket_file_enum.py diff --git a/bbot/core/event/base.py b/bbot/core/event/base.py index 92905bd01..bc6b80451 100644 --- a/bbot/core/event/base.py +++ b/bbot/core/event/base.py @@ -934,6 +934,7 @@ class STORAGE_BUCKET(DictEvent, URL_UNVERIFIED): class _data_validator(BaseModel): name: str url: str + _validate_url = field_validator("url")(validators.validate_url) def _words(self): return self.data["name"] @@ -1009,6 +1010,7 @@ class _data_validator(BaseModel): severity: str description: str url: Optional[str] = None + _validate_url = field_validator("url")(validators.validate_url) _validate_host = field_validator("host")(validators.validate_host) _validate_severity = field_validator("severity")(validators.validate_severity) @@ -1023,6 +1025,7 @@ class _data_validator(BaseModel): host: str description: str url: Optional[str] = None + _validate_url = field_validator("url")(validators.validate_url) _validate_host = field_validator("host")(validators.validate_host) def _pretty_string(self): @@ -1034,6 +1037,7 @@ class _data_validator(BaseModel): host: str technology: str url: Optional[str] = None + _validate_url = field_validator("url")(validators.validate_url) _validate_host = field_validator("host")(validators.validate_host) def _data_id(self): @@ -1050,6 +1054,7 @@ class _data_validator(BaseModel): host: str vhost: str url: Optional[str] = None + _validate_url = field_validator("url")(validators.validate_url) _validate_host = field_validator("host")(validators.validate_host) def _pretty_string(self): diff --git a/bbot/modules/bucket_digitalocean.py b/bbot/modules/bucket_digitalocean.py index 5c92692f7..c467fde6e 100644 --- a/bbot/modules/bucket_digitalocean.py +++ b/bbot/modules/bucket_digitalocean.py @@ -17,4 +17,4 @@ class bucket_digitalocean(bucket_template): regions = ["ams3", "fra1", "nyc3", "sfo2", "sfo3", "sgp1"] def build_url(self, bucket_name, base_domain, region): - return f"https://{bucket_name}.{region}.{base_domain}" + return f"https://{bucket_name}.{region}.{base_domain}/" diff --git a/bbot/modules/bucket_enum.py b/bbot/modules/bucket_enum.py deleted file mode 100644 index 207a1573e..000000000 --- a/bbot/modules/bucket_enum.py +++ /dev/null @@ -1,24 +0,0 @@ -from bbot.modules.base import BaseModule -import xml.etree.ElementTree as ET - -class bucket_enum(BaseModule): - """ - Enumerate files in a public bucket - """ - scope_distance_modifier = 1 - watched_events = ["STORAGE_BUCKET"] - produced_events = ["BUCKET_FILE"] - flags = ["passive", "safe", "cloud-enum"] - - async def handle_event(self, event): - url = event.data["url"] - response = await self.helpers.request(url) - if response.status_code == 200: - content = response.text - root = ET.fromstring(content) - namespace = {'s3': 'http://s3.amazonaws.com/doc/2006-03-01/'} - keys = [key.text for key in root.findall('.//s3:Key', namespace)] - self.hugesuccess(f"Keys: {keys}") - for key in keys: - bucket_file = url + "/" + key - self.emit_event(bucket_file, "BUCKET_FILE", source=event) \ No newline at end of file diff --git a/bbot/modules/bucket_file_enum.py b/bbot/modules/bucket_file_enum.py new file mode 100644 index 000000000..3375d8e10 --- /dev/null +++ b/bbot/modules/bucket_file_enum.py @@ -0,0 +1,34 @@ +from bbot.modules.base import BaseModule +import xml.etree.ElementTree as ET + + +class bucket_file_enum(BaseModule): + """ + Enumerate files in a public bucket + """ + + watched_events = ["STORAGE_BUCKET"] + produced_events = ["URL_UNVERIFIED"] + meta = { + "description": "Works in conjunction with the filedownload module to download files from open storage buckets. Currently supported cloud providers: AWS" + } + flags = ["passive", "safe", "cloud-enum"] + scope_distance_modifier = 2 + + async def handle_event(self, event): + cloud_tags = (t for t in event.tags if t.startswith("cloud-")) + if any(t.endswith("-amazon") or t.endswith("-digitalocean") for t in cloud_tags): + await self.handle_aws(event) + + async def handle_aws(self, event): + url = event.data["url"] + response = await self.helpers.request(url) + status_code = getattr(response, "status_code", 0) + if status_code == 200: + content = response.text + root = ET.fromstring(content) + namespace = {"s3": "http://s3.amazonaws.com/doc/2006-03-01/"} + keys = [key.text for key in root.findall(".//s3:Key", namespace)] + for key in keys: + bucket_file = url + "/" + key + self.emit_event(bucket_file, "URL_UNVERIFIED", source=event, tags="filedownload") diff --git a/bbot/modules/filedownload.py b/bbot/modules/filedownload.py index ad4128e90..4b43e2834 100644 --- a/bbot/modules/filedownload.py +++ b/bbot/modules/filedownload.py @@ -77,7 +77,7 @@ class filedownload(BaseModule): "max_filesize": "Cancel download if filesize is greater than this size", } - scope_distance_modifier = 1 + scope_distance_modifier = 3 async def setup(self): self.extensions = list(set([e.lower().strip(".") for e in self.options.get("extensions", [])])) @@ -101,8 +101,11 @@ async def filter_event(self, event): # accept file download requests from other modules if "filedownload" in event.tags: return True - if self.hash_event(event) in self.urls_downloaded: - return False, f"Already processed {event}" + else: + if event.scope_distance > 1: + return False, f"{event} not within scope distance" + elif self.hash_event(event) in self.urls_downloaded: + return False, f"Already processed {event}" return True def hash_event(self, event): @@ -113,7 +116,9 @@ def hash_event(self, event): async def handle_event(self, event): if event.type == "URL_UNVERIFIED": url_lower = event.data.lower() - if any(url_lower.endswith(f".{e}") for e in self.extensions): + extension_matches = any(url_lower.endswith(f".{e}") for e in self.extensions) + filedownload_requested = "filedownload" in event.tags + if extension_matches or filedownload_requested: await self.download_file(event.data) elif event.type == "HTTP_RESPONSE": content_type = event.data["header"].get("content_type", "") diff --git a/bbot/modules/templates/bucket.py b/bbot/modules/templates/bucket.py index 2456d1449..eef8f5bee 100644 --- a/bbot/modules/templates/bucket.py +++ b/bbot/modules/templates/bucket.py @@ -126,7 +126,7 @@ def valid_bucket_name(self, bucket_name): return False def build_url(self, bucket_name, base_domain, region): - return f"https://{bucket_name}.{base_domain}" + return f"https://{bucket_name}.{base_domain}/" def gen_tags_exists(self, response): return set() diff --git a/bbot/test/conftest.py b/bbot/test/conftest.py index 38069366b..15ef7ecc2 100644 --- a/bbot/test/conftest.py +++ b/bbot/test/conftest.py @@ -18,14 +18,14 @@ def pytest_sessionfinish(session, exitstatus): logger.removeHandler(handler) # Wipe out BBOT home dir - shutil.rmtree("/tmp/.bbot_test", ignore_errors=True) + # shutil.rmtree("/tmp/.bbot_test", ignore_errors=True) yield @pytest.fixture def non_mocked_hosts() -> list: - return ["127.0.0.1", "localhost", "githubusercontent.com"] + interactsh_servers + return ["127.0.0.1", "localhost", "raw.githubusercontent.com"] + interactsh_servers @pytest.fixture diff --git a/bbot/test/test_step_1/test_cloud_helpers.py b/bbot/test/test_step_1/test_cloud_helpers.py index 7dea8cb30..b42da11a7 100644 --- a/bbot/test/test_step_1/test_cloud_helpers.py +++ b/bbot/test/test_step_1/test_cloud_helpers.py @@ -9,7 +9,6 @@ async def test_cloud_helpers(bbot_scanner, bbot_config): for provider_name in provider_names: assert provider_name in scan1.helpers.cloud.providers.providers - log.critical(scan1.helpers.cloud.providers.providers) for p in scan1.helpers.cloud.providers.providers.values(): print(f"{p.name}: {p.domains} / {p.ranges}") amazon_ranges = list(scan1.helpers.cloud["amazon"].ranges) @@ -30,12 +29,10 @@ async def test_cloud_helpers(bbot_scanner, bbot_config): other_event3._resolved_hosts = {"asdf.amazonaws.com"} for event in (ip_event, aws_event1, aws_event2, aws_event4, other_event2, other_event3): - log.critical(event) await scan1.helpers.cloud.tag_event(event) assert "cloud-amazon" in event.tags, f"{event} was not properly cloud-tagged" for event in (aws_event3, other_event1): - log.critical(event) await scan1.helpers.cloud.tag_event(event) assert "cloud-amazon" not in event.tags, f"{event} was improperly cloud-tagged" assert not any( diff --git a/bbot/test/test_step_2/module_tests/test_module_bucket_amazon.py b/bbot/test/test_step_2/module_tests/test_module_bucket_amazon.py index 0f3df7cf9..6d58dd36f 100644 --- a/bbot/test/test_step_2/module_tests/test_module_bucket_amazon.py +++ b/bbot/test/test_step_2/module_tests/test_module_bucket_amazon.py @@ -37,9 +37,9 @@ def modules_overrides(self): return ["excavate", "speculate", "httpx", self.module_name] def url_setup(self): - self.url_1 = f"https://{self.random_bucket_1}" - self.url_2 = f"https://{self.random_bucket_2}" - self.url_3 = f"https://{self.random_bucket_3}" + self.url_1 = f"https://{self.random_bucket_1}/" + self.url_2 = f"https://{self.random_bucket_2}/" + self.url_3 = f"https://{self.random_bucket_3}/" def bucket_setup(self): self.url_setup() @@ -83,14 +83,14 @@ def check(self, module_test, events): url = e.data.get("url", "") assert self.random_bucket_2 in url assert not self.random_bucket_1 in url - assert not f"{self.random_bucket_3}" in url + assert not self.random_bucket_3 in url # make sure bucket mutations were found assert any( e.type == "STORAGE_BUCKET" and str(e.module) == self.module_name and f"{random_bucket_name_3}" in e.data["url"] for e in events - ), f'bucket (dev mutation) not found for module "{self.module_name}"' + ), f'bucket (dev mutation: {self.random_bucket_3}) not found for module "{self.module_name}"' class TestBucket_Amazon(Bucket_Amazon_Base): diff --git a/bbot/test/test_step_2/module_tests/test_module_bucket_file_enum.py b/bbot/test/test_step_2/module_tests/test_module_bucket_file_enum.py new file mode 100644 index 000000000..33f9a33c8 --- /dev/null +++ b/bbot/test/test_step_2/module_tests/test_module_bucket_file_enum.py @@ -0,0 +1,32 @@ +from .base import ModuleTestBase + + +class TestBucket_File_Enum(ModuleTestBase): + targets = ["http://127.0.0.1:8888"] + modules_overrides = ["bucket_file_enum", "filedownload", "httpx", "excavate"] + + open_bucket_url = "https://testbucket.s3.amazonaws.com/" + open_bucket_body = """testbucket1000falseindex.html2023-05-22T23:04:38.000Z"4a2d2d114f3abf90f8bd127c1f25095a"5STANDARDtest.pdf2022-04-30T21:13:40.000Z"723b0018c2f5a7ef06a34f84f6fa97e4"388901STANDARD""" + + pdf_data = """%PDF-1. +1 0 obj<>endobj +2 0 obj<>endobj +3 0 obj<>endobj +trailer <>""" + + async def setup_before_prep(self, module_test): + module_test.httpserver.expect_request("/").respond_with_data(f'') + module_test.httpx_mock.add_response( + url=self.open_bucket_url, + text=self.open_bucket_body, + ) + module_test.httpx_mock.add_response( + url=f"{self.open_bucket_url}test.pdf", + text=self.pdf_data, + headers={"Content-Type": "application/pdf"}, + ) + + def check(self, module_test, events): + download_dir = module_test.scan.home / "filedownload" + files = list(download_dir.glob("*.pdf")) + assert any(f.name.endswith("test.pdf") for f in files), "Failed to download PDF file from open bucket" From 9618739811b3330fddb6e46bb85a1fc0b9cd74a4 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Mon, 13 Nov 2023 16:15:11 -0500 Subject: [PATCH 3/9] fixed tests --- .../module_tests/test_module_filedownload.py | 8 -------- .../module_tests/test_module_massdns.py | 6 ------ .../module_tests/test_module_secretsdb.py | 8 -------- .../test_module_subdomain_hijack.py | 19 ------------------- 4 files changed, 41 deletions(-) diff --git a/bbot/test/test_step_2/module_tests/test_module_filedownload.py b/bbot/test/test_step_2/module_tests/test_module_filedownload.py index e4471d159..2c04003e1 100644 --- a/bbot/test/test_step_2/module_tests/test_module_filedownload.py +++ b/bbot/test/test_step_2/module_tests/test_module_filedownload.py @@ -12,14 +12,6 @@ class TestFileDownload(ModuleTestBase): 3 0 obj<>endobj trailer <>""" - async def setup_before_prep(self, module_test): - module_test.httpx_mock.add_response( - url="https://raw.githubusercontent.com/jshttp/mime-db/master/db.json", - json={ - "application/pdf": {"source": "iana", "compressible": False, "extensions": ["pdf"]}, - }, - ) - async def setup_after_prep(self, module_test): module_test.set_expect_requests( dict(uri="/"), diff --git a/bbot/test/test_step_2/module_tests/test_module_massdns.py b/bbot/test/test_step_2/module_tests/test_module_massdns.py index 04f4860dd..1b4543788 100644 --- a/bbot/test/test_step_2/module_tests/test_module_massdns.py +++ b/bbot/test/test_step_2/module_tests/test_module_massdns.py @@ -5,12 +5,6 @@ class TestMassdns(ModuleTestBase): subdomain_wordlist = tempwordlist(["www", "asdf"]) config_overrides = {"modules": {"massdns": {"wordlist": str(subdomain_wordlist)}}} - async def setup_before_prep(self, module_test): - module_test.httpx_mock.add_response( - url="https://raw.githubusercontent.com/blacklanternsecurity/public-dns-servers/master/nameservers.txt", - text="8.8.8.8\n8.8.4.4\n1.1.1.1", - ) - def check(self, module_test, events): assert any(e.data == "www.blacklanternsecurity.com" for e in events) assert not any(e.data == "asdf.blacklanternsecurity.com" for e in events) diff --git a/bbot/test/test_step_2/module_tests/test_module_secretsdb.py b/bbot/test/test_step_2/module_tests/test_module_secretsdb.py index 67b6e28bc..f735035bc 100644 --- a/bbot/test/test_step_2/module_tests/test_module_secretsdb.py +++ b/bbot/test/test_step_2/module_tests/test_module_secretsdb.py @@ -6,14 +6,6 @@ class TestSecretsDB(ModuleTestBase): modules_overrides = ["httpx", "secretsdb"] async def setup_before_prep(self, module_test): - module_test.httpx_mock.add_response( - url="https://raw.githubusercontent.com/blacklanternsecurity/secrets-patterns-db/master/db/rules-stable.yml", - text="""patterns: -- pattern: - confidence: 99 - name: Asymmetric Private Key - regex: '-----BEGIN ((EC|PGP|DSA|RSA|OPENSSH) )?PRIVATE KEY( BLOCK)?-----'""", - ) expect_args = {"method": "GET", "uri": "/"} respond_args = {"response_data": "-----BEGIN PGP PRIVATE KEY BLOCK-----"} module_test.set_expect_requests(expect_args=expect_args, respond_args=respond_args) diff --git a/bbot/test/test_step_2/module_tests/test_module_subdomain_hijack.py b/bbot/test/test_step_2/module_tests/test_module_subdomain_hijack.py index ace21d352..7aa6a6680 100644 --- a/bbot/test/test_step_2/module_tests/test_module_subdomain_hijack.py +++ b/bbot/test/test_step_2/module_tests/test_module_subdomain_hijack.py @@ -5,25 +5,6 @@ class TestSubdomain_Hijack(ModuleTestBase): targets = ["http://127.0.0.1:8888"] modules_overrides = ["httpx", "excavate", "subdomain_hijack"] - async def setup_before_prep(self, module_test): - module_test.httpx_mock.add_response( - url="https://raw.githubusercontent.com/EdOverflow/can-i-take-over-xyz/master/fingerprints.json", - json=[ - { - "cicd_pass": True, - "cname": ["us-east-1.elasticbeanstalk.com"], - "discussion": "[Issue #194](https://github.com/EdOverflow/can-i-take-over-xyz/issues/194)", - "documentation": "", - "fingerprint": "NXDOMAIN", - "http_status": None, - "nxdomain": True, - "service": "AWS/Elastic Beanstalk", - "status": "Vulnerable", - "vulnerable": True, - } - ], - ) - async def setup_after_prep(self, module_test): fingerprints = module_test.module.fingerprints assert fingerprints, "No subdomain hijacking fingerprints available" From 110cac2623151a7069247a16666eb8fc3a211615 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Mon, 13 Nov 2023 17:01:05 -0500 Subject: [PATCH 4/9] fix tests again --- bbot/test/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bbot/test/conftest.py b/bbot/test/conftest.py index 15ef7ecc2..4dcf8ed21 100644 --- a/bbot/test/conftest.py +++ b/bbot/test/conftest.py @@ -18,7 +18,7 @@ def pytest_sessionfinish(session, exitstatus): logger.removeHandler(handler) # Wipe out BBOT home dir - # shutil.rmtree("/tmp/.bbot_test", ignore_errors=True) + shutil.rmtree("/tmp/.bbot_test", ignore_errors=True) yield From 404433c488c368454f63cc7f3f21c20b7ab3825c Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Mon, 13 Nov 2023 20:03:05 -0500 Subject: [PATCH 5/9] fixed tests again --- .../module_tests/test_module_web_report.py | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/bbot/test/test_step_2/module_tests/test_module_web_report.py b/bbot/test/test_step_2/module_tests/test_module_web_report.py index aa51d501a..a37c178e2 100644 --- a/bbot/test/test_step_2/module_tests/test_module_web_report.py +++ b/bbot/test/test_step_2/module_tests/test_module_web_report.py @@ -7,20 +7,14 @@ class TestWebReport(ModuleTestBase): async def setup_before_prep(self, module_test): # secretsdb --> FINDING - module_test.httpx_mock.add_response( - url="https://raw.githubusercontent.com/blacklanternsecurity/secrets-patterns-db/master/db/rules-stable.yml", - text="""patterns: -- pattern: - confidence: 99 - name: Asymmetric Private Key - regex: '-----BEGIN ((EC|PGP|DSA|RSA|OPENSSH) )?PRIVATE KEY( BLOCK)?-----'""", - ) # wappalyzer --> TECHNOLOGY # badsecrets --> VULNERABILITY respond_args = {"response_data": web_body} module_test.set_expect_requests(respond_args=respond_args) def check(self, module_test, events): + for e in events: + module_test.log.critical(e) report_file = module_test.scan.home / "web_report.html" with open(report_file) as f: report_content = f.read() @@ -31,12 +25,7 @@ def check(self, module_test, events):
  • http://127.0.0.1:8888/""" in report_content ) - assert ( - """

    FINDING

    -
      -
    • Possible secret (Asymmetric Private Key)""" - in report_content - ) + assert """Possible secret (Asymmetric Private Key)""" in report_content assert "

      TECHNOLOGY

      " in report_content assert "

      flask

      " in report_content From 1af9c00ee801a26c52a3e11caf0270af6db9765e Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Tue, 14 Nov 2023 11:26:06 -0500 Subject: [PATCH 6/9] bump test timeouts --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index f02837b14..35f83a893 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -46,7 +46,7 @@ jobs: poetry install - name: Run tests run: | - poetry run pytest --exitfirst --reruns 2 -o timeout_func_only=true --timeout 600 --disable-warnings --log-cli-level=DEBUG --cov-config=bbot/test/coverage.cfg --cov-report xml:cov.xml --cov=bbot . + poetry run pytest --exitfirst --reruns 2 -o timeout_func_only=true --timeout 1200 --disable-warnings --log-cli-level=DEBUG --cov-config=bbot/test/coverage.cfg --cov-report xml:cov.xml --cov=bbot . - name: Upload Code Coverage uses: codecov/codecov-action@v3 with: From 12ce177af2afa20166d417cb03cc6ab822731940 Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Thu, 16 Nov 2023 16:40:52 -0500 Subject: [PATCH 7/9] test troubleshooting --- bbot/scanner/manager.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/bbot/scanner/manager.py b/bbot/scanner/manager.py index 13d46669d..02c78a7e9 100644 --- a/bbot/scanner/manager.py +++ b/bbot/scanner/manager.py @@ -175,6 +175,8 @@ async def _emit_event(self, event, **kwargs): on_success_callback = kwargs.pop("on_success_callback", None) abort_if = kwargs.pop("abort_if", None) + log.debug(f"EMIT {event} 1") + # skip DNS resolution if it's disabled in the config and the event is a target and we don't have a blacklist skip_dns_resolution = (not self.dns_resolution) and "target" in event.tags and not self.scan.blacklist if skip_dns_resolution: @@ -198,6 +200,8 @@ async def _emit_event(self, event, **kwargs): for ip in ips: resolved_hosts.add(ip) + log.debug(f"EMIT {event} 2") + # kill runaway DNS chains dns_resolve_distance = getattr(event, "dns_resolve_distance", 0) if dns_resolve_distance >= self.scan.helpers.dns.max_dns_resolve_distance: @@ -206,6 +210,8 @@ async def _emit_event(self, event, **kwargs): ) dns_children = {} + log.debug(f"EMIT {event} 3") + if event.type in ("DNS_NAME", "IP_ADDRESS"): for tag in dns_tags: event.add_tag(tag) @@ -222,6 +228,8 @@ async def _emit_event(self, event, **kwargs): log.debug(f"Omitting due to blacklisted {reason}: {event}") return + log.debug(f"EMIT {event} 4") + # DNS_NAME --> DNS_NAME_UNRESOLVED if event.type == "DNS_NAME" and "unresolved" in event.tags and not "target" in event.tags: event.type = "DNS_NAME_UNRESOLVED" @@ -229,6 +237,8 @@ async def _emit_event(self, event, **kwargs): # Cloud tagging await self.scan.helpers.cloud.tag_event(event) + log.debug(f"EMIT {event} 5") + # Scope shepherding # here is where we make sure in-scope events are set to their proper scope distance if event.host and event_whitelisted: @@ -243,18 +253,24 @@ async def _emit_event(self, event, **kwargs): ) event.internal = True + log.debug(f"EMIT {event} 6") + # check for wildcards if event.scope_distance <= self.scan.scope_search_distance: if not "unresolved" in event.tags: if not self.scan.helpers.is_ip_type(event.host): await self.scan.helpers.dns.handle_wildcard_event(event, dns_children) + log.debug(f"EMIT {event} 7") + # For DNS_NAMEs, we've waited to do this until now, in case event.data changed during handle_wildcard_event() if event.type == "DNS_NAME": acceptable = self._event_precheck(event) if not acceptable: return + log.debug(f"EMIT {event} 8") + # if we discovered something interesting from an internal event, # make sure we preserve its chain of parents source = event.source @@ -267,6 +283,8 @@ async def _emit_event(self, event, **kwargs): log.debug(f"Re-queuing internal event {source} with parent {event}") self.queue_event(source) + log.debug(f"EMIT {event} 9") + # now that the event is properly tagged, we can finally make decisions about it abort_result = False if callable(abort_if): @@ -280,14 +298,20 @@ async def _emit_event(self, event, **kwargs): log.debug(msg) return + log.debug(f"EMIT {event} 10") + # run success callback before distributing event (so it can add tags, etc.) if callable(on_success_callback): async with self.scan._acatch(context=on_success_callback): await self.scan.helpers.execute_sync_or_async(on_success_callback, event) + log.debug(f"EMIT {event} 11") + await self.distribute_event(event) event_distributed = True + log.debug(f"EMIT {event} 12") + # speculate DNS_NAMES and IP_ADDRESSes from other event types source_event = event if ( @@ -305,6 +329,8 @@ async def _emit_event(self, event, **kwargs): source_event.add_tag("target") self.queue_event(source_event) + log.debug(f"EMIT {event} 13") + ### Emit DNS children ### if self.dns_resolution: emit_children = True @@ -336,6 +362,8 @@ async def _emit_event(self, event, **kwargs): for child_event in dns_child_events: self.queue_event(child_event) + log.debug(f"EMIT {event} 14") + except ValidationError as e: log.warning(f"Event validation failed with kwargs={kwargs}: {e}") log.trace(traceback.format_exc()) From b4eda6175a0c1369201d012105fd46536aaa16fc Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Mon, 20 Nov 2023 11:05:22 -0500 Subject: [PATCH 8/9] updated for file limit / better file extensions --- bbot/core/event/base.py | 11 ++--------- bbot/defaults.yml | 2 ++ bbot/modules/bucket_file_enum.py | 16 +++++++++++++++- bbot/scanner/scanner.py | 4 ++++ .../module_tests/test_module_bucket_file_enum.py | 8 ++++++++ 5 files changed, 31 insertions(+), 10 deletions(-) diff --git a/bbot/core/event/base.py b/bbot/core/event/base.py index bc6b80451..57d2108a8 100644 --- a/bbot/core/event/base.py +++ b/bbot/core/event/base.py @@ -867,16 +867,9 @@ def sanitize_data(self, data): parsed_path_lower = str(self.parsed.path).lower() - url_extension_blacklist = [] - url_extension_httpx_only = [] scan = getattr(self, "scan", None) - if scan is not None: - _url_extension_blacklist = scan.config.get("url_extension_blacklist", []) - _url_extension_httpx_only = scan.config.get("url_extension_httpx_only", []) - if _url_extension_blacklist: - url_extension_blacklist = [e.lower() for e in _url_extension_blacklist] - if _url_extension_httpx_only: - url_extension_httpx_only = [e.lower() for e in _url_extension_httpx_only] + url_extension_blacklist = getattr(scan, "url_extension_blacklist", []) + url_extension_httpx_only = getattr(scan, "url_extension_httpx_only", []) extension = get_file_extension(parsed_path_lower) if extension: diff --git a/bbot/defaults.yml b/bbot/defaults.yml index 5ed85b31d..1baa559ea 100644 --- a/bbot/defaults.yml +++ b/bbot/defaults.yml @@ -99,6 +99,8 @@ url_extension_blacklist: - woff - woff2 - ttf + - sass + - scss # audio - mp3 - m4a diff --git a/bbot/modules/bucket_file_enum.py b/bbot/modules/bucket_file_enum.py index 3375d8e10..7eb6926c0 100644 --- a/bbot/modules/bucket_file_enum.py +++ b/bbot/modules/bucket_file_enum.py @@ -13,8 +13,16 @@ class bucket_file_enum(BaseModule): "description": "Works in conjunction with the filedownload module to download files from open storage buckets. Currently supported cloud providers: AWS" } flags = ["passive", "safe", "cloud-enum"] + options = { + "file_limit": 50, + } + options_desc = {"file_limit": "Limit the number of files downloaded per bucket"} scope_distance_modifier = 2 + async def setup(self): + self.file_limit = self.config.get("file_limit", 50) + return True + async def handle_event(self, event): cloud_tags = (t for t in event.tags if t.startswith("cloud-")) if any(t.endswith("-amazon") or t.endswith("-digitalocean") for t in cloud_tags): @@ -22,6 +30,7 @@ async def handle_event(self, event): async def handle_aws(self, event): url = event.data["url"] + urls_emitted = 0 response = await self.helpers.request(url) status_code = getattr(response, "status_code", 0) if status_code == 200: @@ -31,4 +40,9 @@ async def handle_aws(self, event): keys = [key.text for key in root.findall(".//s3:Key", namespace)] for key in keys: bucket_file = url + "/" + key - self.emit_event(bucket_file, "URL_UNVERIFIED", source=event, tags="filedownload") + file_extension = self.helpers.get_file_extension(key) + if file_extension not in self.scan.url_extension_blacklist: + self.emit_event(bucket_file, "URL_UNVERIFIED", source=event, tags="filedownload") + urls_emitted += 1 + if urls_emitted >= self.file_limit: + return diff --git a/bbot/scanner/scanner.py b/bbot/scanner/scanner.py index 84c623300..cf9851a3c 100644 --- a/bbot/scanner/scanner.py +++ b/bbot/scanner/scanner.py @@ -230,6 +230,10 @@ def __init__( ) self.scope_report_distance = int(self.config.get("scope_report_distance", 1)) + # url file extensions + self.url_extension_blacklist = set(e.lower() for e in self.config.get("url_extension_blacklist", [])) + self.url_extension_httpx_only = set(e.lower() for e in self.config.get("url_extension_httpx_only", [])) + # custom HTTP headers warning self.custom_http_headers = self.config.get("http_headers", {}) if self.custom_http_headers: diff --git a/bbot/test/test_step_2/module_tests/test_module_bucket_file_enum.py b/bbot/test/test_step_2/module_tests/test_module_bucket_file_enum.py index 33f9a33c8..6361fe681 100644 --- a/bbot/test/test_step_2/module_tests/test_module_bucket_file_enum.py +++ b/bbot/test/test_step_2/module_tests/test_module_bucket_file_enum.py @@ -4,6 +4,7 @@ class TestBucket_File_Enum(ModuleTestBase): targets = ["http://127.0.0.1:8888"] modules_overrides = ["bucket_file_enum", "filedownload", "httpx", "excavate"] + config_overrides = {"scope_report_distance": 5} open_bucket_url = "https://testbucket.s3.amazonaws.com/" open_bucket_body = """testbucket1000falseindex.html2023-05-22T23:04:38.000Z"4a2d2d114f3abf90f8bd127c1f25095a"5STANDARDtest.pdf2022-04-30T21:13:40.000Z"723b0018c2f5a7ef06a34f84f6fa97e4"388901STANDARD""" @@ -25,8 +26,15 @@ async def setup_before_prep(self, module_test): text=self.pdf_data, headers={"Content-Type": "application/pdf"}, ) + module_test.httpx_mock.add_response( + url=f"{self.open_bucket_url}test.css", + text="", + ) def check(self, module_test, events): download_dir = module_test.scan.home / "filedownload" files = list(download_dir.glob("*.pdf")) + assert any(e.type == "URL_UNVERIFIED" and e.data.endswith("test.pdf") for e in events) + assert not any(e.type == "URL_UNVERIFIED" and e.data.endswith("test.css") for e in events) assert any(f.name.endswith("test.pdf") for f in files), "Failed to download PDF file from open bucket" + assert not any(f.name.endswith("test.css") for f in files), "Unwanted CSS file was downloaded" From f67a5ce2add66ab5be8c6bd9dc227f8debf6c4fb Mon Sep 17 00:00:00 2001 From: TheTechromancer Date: Mon, 20 Nov 2023 12:19:18 -0500 Subject: [PATCH 9/9] removed debug statements --- bbot/scanner/manager.py | 28 ---------------------------- 1 file changed, 28 deletions(-) diff --git a/bbot/scanner/manager.py b/bbot/scanner/manager.py index 02c78a7e9..13d46669d 100644 --- a/bbot/scanner/manager.py +++ b/bbot/scanner/manager.py @@ -175,8 +175,6 @@ async def _emit_event(self, event, **kwargs): on_success_callback = kwargs.pop("on_success_callback", None) abort_if = kwargs.pop("abort_if", None) - log.debug(f"EMIT {event} 1") - # skip DNS resolution if it's disabled in the config and the event is a target and we don't have a blacklist skip_dns_resolution = (not self.dns_resolution) and "target" in event.tags and not self.scan.blacklist if skip_dns_resolution: @@ -200,8 +198,6 @@ async def _emit_event(self, event, **kwargs): for ip in ips: resolved_hosts.add(ip) - log.debug(f"EMIT {event} 2") - # kill runaway DNS chains dns_resolve_distance = getattr(event, "dns_resolve_distance", 0) if dns_resolve_distance >= self.scan.helpers.dns.max_dns_resolve_distance: @@ -210,8 +206,6 @@ async def _emit_event(self, event, **kwargs): ) dns_children = {} - log.debug(f"EMIT {event} 3") - if event.type in ("DNS_NAME", "IP_ADDRESS"): for tag in dns_tags: event.add_tag(tag) @@ -228,8 +222,6 @@ async def _emit_event(self, event, **kwargs): log.debug(f"Omitting due to blacklisted {reason}: {event}") return - log.debug(f"EMIT {event} 4") - # DNS_NAME --> DNS_NAME_UNRESOLVED if event.type == "DNS_NAME" and "unresolved" in event.tags and not "target" in event.tags: event.type = "DNS_NAME_UNRESOLVED" @@ -237,8 +229,6 @@ async def _emit_event(self, event, **kwargs): # Cloud tagging await self.scan.helpers.cloud.tag_event(event) - log.debug(f"EMIT {event} 5") - # Scope shepherding # here is where we make sure in-scope events are set to their proper scope distance if event.host and event_whitelisted: @@ -253,24 +243,18 @@ async def _emit_event(self, event, **kwargs): ) event.internal = True - log.debug(f"EMIT {event} 6") - # check for wildcards if event.scope_distance <= self.scan.scope_search_distance: if not "unresolved" in event.tags: if not self.scan.helpers.is_ip_type(event.host): await self.scan.helpers.dns.handle_wildcard_event(event, dns_children) - log.debug(f"EMIT {event} 7") - # For DNS_NAMEs, we've waited to do this until now, in case event.data changed during handle_wildcard_event() if event.type == "DNS_NAME": acceptable = self._event_precheck(event) if not acceptable: return - log.debug(f"EMIT {event} 8") - # if we discovered something interesting from an internal event, # make sure we preserve its chain of parents source = event.source @@ -283,8 +267,6 @@ async def _emit_event(self, event, **kwargs): log.debug(f"Re-queuing internal event {source} with parent {event}") self.queue_event(source) - log.debug(f"EMIT {event} 9") - # now that the event is properly tagged, we can finally make decisions about it abort_result = False if callable(abort_if): @@ -298,20 +280,14 @@ async def _emit_event(self, event, **kwargs): log.debug(msg) return - log.debug(f"EMIT {event} 10") - # run success callback before distributing event (so it can add tags, etc.) if callable(on_success_callback): async with self.scan._acatch(context=on_success_callback): await self.scan.helpers.execute_sync_or_async(on_success_callback, event) - log.debug(f"EMIT {event} 11") - await self.distribute_event(event) event_distributed = True - log.debug(f"EMIT {event} 12") - # speculate DNS_NAMES and IP_ADDRESSes from other event types source_event = event if ( @@ -329,8 +305,6 @@ async def _emit_event(self, event, **kwargs): source_event.add_tag("target") self.queue_event(source_event) - log.debug(f"EMIT {event} 13") - ### Emit DNS children ### if self.dns_resolution: emit_children = True @@ -362,8 +336,6 @@ async def _emit_event(self, event, **kwargs): for child_event in dns_child_events: self.queue_event(child_event) - log.debug(f"EMIT {event} 14") - except ValidationError as e: log.warning(f"Event validation failed with kwargs={kwargs}: {e}") log.trace(traceback.format_exc())