diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index f02837b14..35f83a893 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -46,7 +46,7 @@ jobs:
poetry install
- name: Run tests
run: |
- poetry run pytest --exitfirst --reruns 2 -o timeout_func_only=true --timeout 600 --disable-warnings --log-cli-level=DEBUG --cov-config=bbot/test/coverage.cfg --cov-report xml:cov.xml --cov=bbot .
+ poetry run pytest --exitfirst --reruns 2 -o timeout_func_only=true --timeout 1200 --disable-warnings --log-cli-level=DEBUG --cov-config=bbot/test/coverage.cfg --cov-report xml:cov.xml --cov=bbot .
- name: Upload Code Coverage
uses: codecov/codecov-action@v3
with:
diff --git a/bbot/core/event/base.py b/bbot/core/event/base.py
index 92905bd01..57d2108a8 100644
--- a/bbot/core/event/base.py
+++ b/bbot/core/event/base.py
@@ -867,16 +867,9 @@ def sanitize_data(self, data):
parsed_path_lower = str(self.parsed.path).lower()
- url_extension_blacklist = []
- url_extension_httpx_only = []
scan = getattr(self, "scan", None)
- if scan is not None:
- _url_extension_blacklist = scan.config.get("url_extension_blacklist", [])
- _url_extension_httpx_only = scan.config.get("url_extension_httpx_only", [])
- if _url_extension_blacklist:
- url_extension_blacklist = [e.lower() for e in _url_extension_blacklist]
- if _url_extension_httpx_only:
- url_extension_httpx_only = [e.lower() for e in _url_extension_httpx_only]
+ url_extension_blacklist = getattr(scan, "url_extension_blacklist", [])
+ url_extension_httpx_only = getattr(scan, "url_extension_httpx_only", [])
extension = get_file_extension(parsed_path_lower)
if extension:
@@ -934,6 +927,7 @@ class STORAGE_BUCKET(DictEvent, URL_UNVERIFIED):
class _data_validator(BaseModel):
name: str
url: str
+ _validate_url = field_validator("url")(validators.validate_url)
def _words(self):
return self.data["name"]
@@ -1009,6 +1003,7 @@ class _data_validator(BaseModel):
severity: str
description: str
url: Optional[str] = None
+ _validate_url = field_validator("url")(validators.validate_url)
_validate_host = field_validator("host")(validators.validate_host)
_validate_severity = field_validator("severity")(validators.validate_severity)
@@ -1023,6 +1018,7 @@ class _data_validator(BaseModel):
host: str
description: str
url: Optional[str] = None
+ _validate_url = field_validator("url")(validators.validate_url)
_validate_host = field_validator("host")(validators.validate_host)
def _pretty_string(self):
@@ -1034,6 +1030,7 @@ class _data_validator(BaseModel):
host: str
technology: str
url: Optional[str] = None
+ _validate_url = field_validator("url")(validators.validate_url)
_validate_host = field_validator("host")(validators.validate_host)
def _data_id(self):
@@ -1050,6 +1047,7 @@ class _data_validator(BaseModel):
host: str
vhost: str
url: Optional[str] = None
+ _validate_url = field_validator("url")(validators.validate_url)
_validate_host = field_validator("host")(validators.validate_host)
def _pretty_string(self):
diff --git a/bbot/defaults.yml b/bbot/defaults.yml
index 5ed85b31d..1baa559ea 100644
--- a/bbot/defaults.yml
+++ b/bbot/defaults.yml
@@ -99,6 +99,8 @@ url_extension_blacklist:
- woff
- woff2
- ttf
+ - sass
+ - scss
# audio
- mp3
- m4a
diff --git a/bbot/modules/bucket_digitalocean.py b/bbot/modules/bucket_digitalocean.py
index 5c92692f7..c467fde6e 100644
--- a/bbot/modules/bucket_digitalocean.py
+++ b/bbot/modules/bucket_digitalocean.py
@@ -17,4 +17,4 @@ class bucket_digitalocean(bucket_template):
regions = ["ams3", "fra1", "nyc3", "sfo2", "sfo3", "sgp1"]
def build_url(self, bucket_name, base_domain, region):
- return f"https://{bucket_name}.{region}.{base_domain}"
+ return f"https://{bucket_name}.{region}.{base_domain}/"
diff --git a/bbot/modules/bucket_file_enum.py b/bbot/modules/bucket_file_enum.py
new file mode 100644
index 000000000..7eb6926c0
--- /dev/null
+++ b/bbot/modules/bucket_file_enum.py
@@ -0,0 +1,48 @@
+from bbot.modules.base import BaseModule
+import xml.etree.ElementTree as ET
+
+
+class bucket_file_enum(BaseModule):
+ """
+ Enumerate files in a public bucket
+ """
+
+ watched_events = ["STORAGE_BUCKET"]
+ produced_events = ["URL_UNVERIFIED"]
+ meta = {
+ "description": "Works in conjunction with the filedownload module to download files from open storage buckets. Currently supported cloud providers: AWS"
+ }
+ flags = ["passive", "safe", "cloud-enum"]
+ options = {
+ "file_limit": 50,
+ }
+ options_desc = {"file_limit": "Limit the number of files downloaded per bucket"}
+ scope_distance_modifier = 2
+
+ async def setup(self):
+ self.file_limit = self.config.get("file_limit", 50)
+ return True
+
+ async def handle_event(self, event):
+ cloud_tags = (t for t in event.tags if t.startswith("cloud-"))
+ if any(t.endswith("-amazon") or t.endswith("-digitalocean") for t in cloud_tags):
+ await self.handle_aws(event)
+
+ async def handle_aws(self, event):
+ url = event.data["url"]
+ urls_emitted = 0
+ response = await self.helpers.request(url)
+ status_code = getattr(response, "status_code", 0)
+ if status_code == 200:
+ content = response.text
+ root = ET.fromstring(content)
+ namespace = {"s3": "http://s3.amazonaws.com/doc/2006-03-01/"}
+ keys = [key.text for key in root.findall(".//s3:Key", namespace)]
+ for key in keys:
+ bucket_file = url + "/" + key
+ file_extension = self.helpers.get_file_extension(key)
+ if file_extension not in self.scan.url_extension_blacklist:
+ self.emit_event(bucket_file, "URL_UNVERIFIED", source=event, tags="filedownload")
+ urls_emitted += 1
+ if urls_emitted >= self.file_limit:
+ return
diff --git a/bbot/modules/filedownload.py b/bbot/modules/filedownload.py
index ad4128e90..4b43e2834 100644
--- a/bbot/modules/filedownload.py
+++ b/bbot/modules/filedownload.py
@@ -77,7 +77,7 @@ class filedownload(BaseModule):
"max_filesize": "Cancel download if filesize is greater than this size",
}
- scope_distance_modifier = 1
+ scope_distance_modifier = 3
async def setup(self):
self.extensions = list(set([e.lower().strip(".") for e in self.options.get("extensions", [])]))
@@ -101,8 +101,11 @@ async def filter_event(self, event):
# accept file download requests from other modules
if "filedownload" in event.tags:
return True
- if self.hash_event(event) in self.urls_downloaded:
- return False, f"Already processed {event}"
+ else:
+ if event.scope_distance > 1:
+ return False, f"{event} not within scope distance"
+ elif self.hash_event(event) in self.urls_downloaded:
+ return False, f"Already processed {event}"
return True
def hash_event(self, event):
@@ -113,7 +116,9 @@ def hash_event(self, event):
async def handle_event(self, event):
if event.type == "URL_UNVERIFIED":
url_lower = event.data.lower()
- if any(url_lower.endswith(f".{e}") for e in self.extensions):
+ extension_matches = any(url_lower.endswith(f".{e}") for e in self.extensions)
+ filedownload_requested = "filedownload" in event.tags
+ if extension_matches or filedownload_requested:
await self.download_file(event.data)
elif event.type == "HTTP_RESPONSE":
content_type = event.data["header"].get("content_type", "")
diff --git a/bbot/modules/templates/bucket.py b/bbot/modules/templates/bucket.py
index 2456d1449..eef8f5bee 100644
--- a/bbot/modules/templates/bucket.py
+++ b/bbot/modules/templates/bucket.py
@@ -126,7 +126,7 @@ def valid_bucket_name(self, bucket_name):
return False
def build_url(self, bucket_name, base_domain, region):
- return f"https://{bucket_name}.{base_domain}"
+ return f"https://{bucket_name}.{base_domain}/"
def gen_tags_exists(self, response):
return set()
diff --git a/bbot/scanner/scanner.py b/bbot/scanner/scanner.py
index 84c623300..cf9851a3c 100644
--- a/bbot/scanner/scanner.py
+++ b/bbot/scanner/scanner.py
@@ -230,6 +230,10 @@ def __init__(
)
self.scope_report_distance = int(self.config.get("scope_report_distance", 1))
+ # url file extensions
+ self.url_extension_blacklist = set(e.lower() for e in self.config.get("url_extension_blacklist", []))
+ self.url_extension_httpx_only = set(e.lower() for e in self.config.get("url_extension_httpx_only", []))
+
# custom HTTP headers warning
self.custom_http_headers = self.config.get("http_headers", {})
if self.custom_http_headers:
diff --git a/bbot/test/conftest.py b/bbot/test/conftest.py
index 38069366b..4dcf8ed21 100644
--- a/bbot/test/conftest.py
+++ b/bbot/test/conftest.py
@@ -25,7 +25,7 @@ def pytest_sessionfinish(session, exitstatus):
@pytest.fixture
def non_mocked_hosts() -> list:
- return ["127.0.0.1", "localhost", "githubusercontent.com"] + interactsh_servers
+ return ["127.0.0.1", "localhost", "raw.githubusercontent.com"] + interactsh_servers
@pytest.fixture
diff --git a/bbot/test/test_step_1/test_cloud_helpers.py b/bbot/test/test_step_1/test_cloud_helpers.py
index 7dea8cb30..b42da11a7 100644
--- a/bbot/test/test_step_1/test_cloud_helpers.py
+++ b/bbot/test/test_step_1/test_cloud_helpers.py
@@ -9,7 +9,6 @@ async def test_cloud_helpers(bbot_scanner, bbot_config):
for provider_name in provider_names:
assert provider_name in scan1.helpers.cloud.providers.providers
- log.critical(scan1.helpers.cloud.providers.providers)
for p in scan1.helpers.cloud.providers.providers.values():
print(f"{p.name}: {p.domains} / {p.ranges}")
amazon_ranges = list(scan1.helpers.cloud["amazon"].ranges)
@@ -30,12 +29,10 @@ async def test_cloud_helpers(bbot_scanner, bbot_config):
other_event3._resolved_hosts = {"asdf.amazonaws.com"}
for event in (ip_event, aws_event1, aws_event2, aws_event4, other_event2, other_event3):
- log.critical(event)
await scan1.helpers.cloud.tag_event(event)
assert "cloud-amazon" in event.tags, f"{event} was not properly cloud-tagged"
for event in (aws_event3, other_event1):
- log.critical(event)
await scan1.helpers.cloud.tag_event(event)
assert "cloud-amazon" not in event.tags, f"{event} was improperly cloud-tagged"
assert not any(
diff --git a/bbot/test/test_step_2/module_tests/test_module_bucket_amazon.py b/bbot/test/test_step_2/module_tests/test_module_bucket_amazon.py
index 0f3df7cf9..6d58dd36f 100644
--- a/bbot/test/test_step_2/module_tests/test_module_bucket_amazon.py
+++ b/bbot/test/test_step_2/module_tests/test_module_bucket_amazon.py
@@ -37,9 +37,9 @@ def modules_overrides(self):
return ["excavate", "speculate", "httpx", self.module_name]
def url_setup(self):
- self.url_1 = f"https://{self.random_bucket_1}"
- self.url_2 = f"https://{self.random_bucket_2}"
- self.url_3 = f"https://{self.random_bucket_3}"
+ self.url_1 = f"https://{self.random_bucket_1}/"
+ self.url_2 = f"https://{self.random_bucket_2}/"
+ self.url_3 = f"https://{self.random_bucket_3}/"
def bucket_setup(self):
self.url_setup()
@@ -83,14 +83,14 @@ def check(self, module_test, events):
url = e.data.get("url", "")
assert self.random_bucket_2 in url
assert not self.random_bucket_1 in url
- assert not f"{self.random_bucket_3}" in url
+ assert not self.random_bucket_3 in url
# make sure bucket mutations were found
assert any(
e.type == "STORAGE_BUCKET"
and str(e.module) == self.module_name
and f"{random_bucket_name_3}" in e.data["url"]
for e in events
- ), f'bucket (dev mutation) not found for module "{self.module_name}"'
+ ), f'bucket (dev mutation: {self.random_bucket_3}) not found for module "{self.module_name}"'
class TestBucket_Amazon(Bucket_Amazon_Base):
diff --git a/bbot/test/test_step_2/module_tests/test_module_bucket_file_enum.py b/bbot/test/test_step_2/module_tests/test_module_bucket_file_enum.py
new file mode 100644
index 000000000..6361fe681
--- /dev/null
+++ b/bbot/test/test_step_2/module_tests/test_module_bucket_file_enum.py
@@ -0,0 +1,40 @@
+from .base import ModuleTestBase
+
+
+class TestBucket_File_Enum(ModuleTestBase):
+ targets = ["http://127.0.0.1:8888"]
+ modules_overrides = ["bucket_file_enum", "filedownload", "httpx", "excavate"]
+ config_overrides = {"scope_report_distance": 5}
+
+ open_bucket_url = "https://testbucket.s3.amazonaws.com/"
+ open_bucket_body = """
flask
" in report_content