diff --git a/README.md b/README.md index 9ceaf336d9..b979a1b6ee 100644 --- a/README.md +++ b/README.md @@ -317,6 +317,11 @@ Targets can be any of the following: - `IP_RANGE` (`1.2.3.0/24`) - `OPEN_TCP_PORT` (`192.168.0.1:80`) - `URL` (`https://www.evilcorp.com`) +- `EMAIL_ADDRESS` (`bob@evilcorp.com`) +- `ORG_STUB` (`ORG:evilcorp`) +- `USER_STUB` (`USER:bobsmith`) +- `FILESYSTEM` (`FILESYSTEM:/tmp/asdf`) +- `MOBILE_APP` (`MOBILE_APP:https://play.google.com/store/apps/details?id=com.evilcorp.app`) For more information, see [Targets](https://www.blacklanternsecurity.com/bbot/Stable/scanning/#targets-t). To learn how BBOT handles scope, see [Scope](https://www.blacklanternsecurity.com/bbot/Stable/scanning/#scope). diff --git a/bbot/core/event/base.py b/bbot/core/event/base.py index 74ec0b6961..3c22364d29 100644 --- a/bbot/core/event/base.py +++ b/bbot/core/event/base.py @@ -14,8 +14,8 @@ from copy import copy, deepcopy from contextlib import suppress from radixtarget import RadixTarget -from urllib.parse import urljoin, parse_qs from pydantic import BaseModel, field_validator +from urllib.parse import urlparse, urljoin, parse_qs from .helpers import * @@ -1646,6 +1646,27 @@ class RAW_DNS_RECORD(DictHostEvent, DnsEvent): class MOBILE_APP(DictEvent): _always_emit = True + def _sanitize_data(self, data): + if isinstance(data, str): + data = {"url": data} + if "url" not in data: + raise ValidationError("url is required for MOBILE_APP events") + url = data["url"] + # parse URL + try: + self.parsed_url = urlparse(url) + except Exception as e: + raise ValidationError(f"Error parsing URL {url}: {e}") + if not "id" in data: + # extract "id" getparam + params = parse_qs(self.parsed_url.query) + try: + _id = params["id"][0] + except Exception: + raise ValidationError("id is required for MOBILE_APP events") + data["id"] = _id + return data + def _pretty_string(self): return self.data["url"] diff --git a/bbot/core/helpers/misc.py b/bbot/core/helpers/misc.py index ced61925ce..688f9f599c 100644 --- a/bbot/core/helpers/misc.py +++ b/bbot/core/helpers/misc.py @@ -562,13 +562,12 @@ def is_port(p): return p and p.isdigit() and 0 <= int(p) <= 65535 -def is_dns_name(d, include_local=True): +def is_dns_name(d): """ Determines if the given string is a valid DNS name. Args: d (str): The string to be checked. - include_local (bool): Consider local hostnames to be valid (hostnames without periods) Returns: bool: True if the string is a valid DNS name, False otherwise. @@ -578,17 +577,12 @@ def is_dns_name(d, include_local=True): True >>> is_dns_name('localhost') True - >>> is_dns_name('localhost', include_local=False) - False >>> is_dns_name('192.168.1.1') False """ if is_ip(d): return False d = smart_decode(d) - if include_local: - if bbot_regexes.hostname_regex.match(d): - return True if bbot_regexes.dns_name_validation_regex.match(d): return True return False diff --git a/bbot/core/helpers/regexes.py b/bbot/core/helpers/regexes.py index 4e3c1e4250..8e162a3262 100644 --- a/bbot/core/helpers/regexes.py +++ b/bbot/core/helpers/regexes.py @@ -39,14 +39,10 @@ ip_range_regexes = [re.compile(r, re.I) for r in _ip_range_regexes] # dns names with periods -_dns_name_regex = r"(?:\w(?:[\w-]{0,100}\w)?\.)+(?:[xX][nN]--)?[^\W_]{1,63}\.?" +_dns_name_regex = r"(?:\w(?:[\w-]{0,100}\w)?\.?)+(?:[xX][nN]--)?[^\W_]{1,63}\.?" dns_name_extraction_regex = re.compile(_dns_name_regex, re.I) dns_name_validation_regex = re.compile(r"^" + _dns_name_regex + r"$", re.I) -# dns names without periods -_hostname_regex = r"(?!\w*\.\w+)\w(?:[\w-]{0,100}\w)?" -hostname_regex = re.compile(r"^" + _hostname_regex + r"$", re.I) - _email_regex = r"(?:[^\W_][\w\-\.\+']{,100})@" + _dns_name_regex email_regex = re.compile(_email_regex, re.I) @@ -61,14 +57,12 @@ _open_port_regexes = ( _dns_name_regex + r":[0-9]{1,5}", - _hostname_regex + r":[0-9]{1,5}", r"\[" + _ipv6_regex + r"\]:[0-9]{1,5}", ) open_port_regexes = [re.compile(r, re.I) for r in _open_port_regexes] _url_regexes = ( r"https?://" + _dns_name_regex + r"(?::[0-9]{1,5})?(?:(?:/|\?).*)?", - r"https?://" + _hostname_regex + r"(?::[0-9]{1,5})?(?:(?:/|\?).*)?", r"https?://\[" + _ipv6_regex + r"\](?::[0-9]{1,5})?(?:(?:/|\?).*)?", ) url_regexes = [re.compile(r, re.I) for r in _url_regexes] @@ -83,10 +77,7 @@ for k, regexes in ( ( "DNS_NAME", - ( - r"^" + _dns_name_regex + r"$", - r"^" + _hostname_regex + r"$", - ), + (r"^" + _dns_name_regex + r"$",), ), ( "EMAIL_ADDRESS", @@ -170,7 +161,7 @@ button_tag_regex2 = re.compile( r"]*?value=[\"\']?([\-%\._=+\/\w]*)[\"\']?[^>]*?name=[\"\']?([\-\._=+\/\w]+)[\"\']?[^>]*?>" ) -tag_attribute_regex = re.compile(r"<[^>]*(?:href|action|src)\s*=\s*[\"\']?([^\s\'\"\>]+)[\"\']?[^>]*>") +tag_attribute_regex = re.compile(r"<[^>]*(?:href|action|src)\s*=\s*[\"\']?(?!mailto:)([^\s\'\"\>]+)[\"\']?[^>]*>") valid_netloc = r"[^\s!@#$%^&()=/?\\'\";~`<>]+" diff --git a/bbot/modules/github_org.py b/bbot/modules/github_org.py index 5b8571874c..5417a4e2d9 100644 --- a/bbot/modules/github_org.py +++ b/bbot/modules/github_org.py @@ -206,11 +206,7 @@ async def validate_org(self, org): for k, v in json.items(): if ( isinstance(v, str) - and ( - self.helpers.is_dns_name(v, include_local=False) - or self.helpers.is_url(v) - or self.helpers.is_email(v) - ) + and (self.helpers.is_dns_name(v) and "." in v or self.helpers.is_url(v) or self.helpers.is_email(v)) and self.scan.in_scope(v) ): self.verbose(f'Found in-scope key "{k}": "{v}" for {org}, it appears to be in-scope') diff --git a/bbot/modules/internal/excavate.py b/bbot/modules/internal/excavate.py index 7db6db2f97..f7a6e3cc66 100644 --- a/bbot/modules/internal/excavate.py +++ b/bbot/modules/internal/excavate.py @@ -613,7 +613,9 @@ async def process(self, yara_results, event, yara_rule_settings, discovery_conte if self.excavate.in_bl(parameter_name) is False: parsed_url = urlparse(url) if not parsed_url.hostname: - self.excavate.warning(f"Error Parsing reconstructed URL [{url}] during parameter extraction, missing hostname") + self.excavate.warning( + f"Error Parsing reconstructed URL [{url}] during parameter extraction, missing hostname" + ) continue description = f"HTTP Extracted Parameter [{parameter_name}] ({parameterExtractorSubModule.name} Submodule)" data = { @@ -814,7 +816,7 @@ class URLExtractor(ExcavateRule): """ ), } - full_url_regex = re.compile(r"(https?)://((?:\w|\d)(?:[\d\w-]+\.?)+(?::\d{1,5})?(?:/[-\w\.\(\)]*[-\w\.]+)*/?)") + full_url_regex = re.compile(r"(https?)://(\w(?:[\w-]+\.?)+(?::\d{1,5})?(?:/[-\w\.\(\)]*[-\w\.]+)*/?)") full_url_regex_strict = re.compile(r"^(https?):\/\/([\w.-]+)(?::\d{1,5})?(\/[\w\/\.-]*)?(\?[^\s]+)?$") tag_attribute_regex = bbot_regexes.tag_attribute_regex diff --git a/bbot/modules/trufflehog.py b/bbot/modules/trufflehog.py index 8441c73648..7949b370a5 100644 --- a/bbot/modules/trufflehog.py +++ b/bbot/modules/trufflehog.py @@ -13,7 +13,7 @@ class trufflehog(BaseModule): } options = { - "version": "3.87.0", + "version": "3.87.2", "config": "", "only_verified": True, "concurrency": 8, diff --git a/bbot/scanner/target.py b/bbot/scanner/target.py index f86b0de15b..bdd9edd107 100644 --- a/bbot/scanner/target.py +++ b/bbot/scanner/target.py @@ -95,9 +95,9 @@ def add(self, targets): else: event = self.make_event(target) if event: + self.inputs.add(target) _events = [event] for event in _events: - self.inputs.add(event.data) events.add(event) # sort by host size to ensure consistency @@ -140,6 +140,20 @@ def handle_username(self, match): return [username_event] return [] + @special_target_type(r"^(?:FILESYSTEM|FILE|FOLDER|DIR|PATH):(.*)") + def handle_filesystem(self, match): + filesystem_event = self.make_event({"path": match.group(1)}, event_type="FILESYSTEM") + if filesystem_event: + return [filesystem_event] + return [] + + @special_target_type(r"^(?:MOBILE_APP|APK|IPA|APP):(.*)") + def handle_mobile_app(self, match): + mobile_app_event = self.make_event({"url": match.group(1)}, event_type="MOBILE_APP") + if mobile_app_event: + return [mobile_app_event] + return [] + def get(self, event, single=True, **kwargs): results = super().get(event, **kwargs) if results and single: diff --git a/bbot/test/test_step_1/test_events.py b/bbot/test/test_step_1/test_events.py index 195f08ea89..c4ecfbd161 100644 --- a/bbot/test/test_step_1/test_events.py +++ b/bbot/test/test_step_1/test_events.py @@ -979,6 +979,45 @@ def test_event_magic(): zip_file.unlink() +@pytest.mark.asyncio +async def test_mobile_app(): + scan = Scanner() + with pytest.raises(ValidationError): + scan.make_event("com.evilcorp.app", "MOBILE_APP", parent=scan.root_event) + with pytest.raises(ValidationError): + scan.make_event({"id": "com.evilcorp.app"}, "MOBILE_APP", parent=scan.root_event) + with pytest.raises(ValidationError): + scan.make_event({"url": "https://play.google.com/store/apps/details"}, "MOBILE_APP", parent=scan.root_event) + mobile_app = scan.make_event( + {"url": "https://play.google.com/store/apps/details?id=com.evilcorp.app"}, "MOBILE_APP", parent=scan.root_event + ) + assert sorted(mobile_app.data.items()) == [ + ("id", "com.evilcorp.app"), + ("url", "https://play.google.com/store/apps/details?id=com.evilcorp.app"), + ] + + scan = Scanner("MOBILE_APP:https://play.google.com/store/apps/details?id=com.evilcorp.app") + events = [e async for e in scan.async_start()] + assert len(events) == 3 + mobile_app_event = [e for e in events if e.type == "MOBILE_APP"][0] + assert mobile_app_event.type == "MOBILE_APP" + assert sorted(mobile_app_event.data.items()) == [ + ("id", "com.evilcorp.app"), + ("url", "https://play.google.com/store/apps/details?id=com.evilcorp.app"), + ] + + +@pytest.mark.asyncio +async def test_filesystem(): + scan = Scanner("FILESYSTEM:/tmp/asdf") + events = [e async for e in scan.async_start()] + assert len(events) == 3 + filesystem_events = [e for e in events if e.type == "FILESYSTEM"] + assert len(filesystem_events) == 1 + assert filesystem_events[0].type == "FILESYSTEM" + assert filesystem_events[0].data == {"path": "/tmp/asdf"} + + def test_event_hashing(): scan = Scanner("example.com") url_event = scan.make_event("https://api.example.com/", "URL_UNVERIFIED", parent=scan.root_event) diff --git a/bbot/test/test_step_1/test_helpers.py b/bbot/test/test_step_1/test_helpers.py index 329994c748..12fb15278f 100644 --- a/bbot/test/test_step_1/test_helpers.py +++ b/bbot/test/test_step_1/test_helpers.py @@ -122,7 +122,7 @@ async def test_helpers_misc(helpers, scan, bbot_scanner, bbot_httpserver): assert not helpers.is_dns_name("evilcorp.com:80") assert not helpers.is_dns_name("http://evilcorp.com:80") assert helpers.is_dns_name("evilcorp") - assert not helpers.is_dns_name("evilcorp", include_local=False) + assert helpers.is_dns_name("evilcorp.") assert helpers.is_dns_name("ドメイン.テスト") assert not helpers.is_dns_name("127.0.0.1") assert not helpers.is_dns_name("dead::beef") diff --git a/bbot/test/test_step_1/test_presets.py b/bbot/test/test_step_1/test_presets.py index 5b1564f12c..be31b38673 100644 --- a/bbot/test/test_step_1/test_presets.py +++ b/bbot/test/test_step_1/test_presets.py @@ -272,13 +272,13 @@ def test_preset_scope(): } assert preset_whitelist_baked.to_dict(include_target=True) == { "target": ["evilcorp.org"], - "whitelist": ["1.2.3.0/24", "http://evilcorp.net/"], + "whitelist": ["1.2.3.4/24", "http://evilcorp.net"], "blacklist": ["bob@evilcorp.co.uk", "evilcorp.co.uk:443"], "config": {"modules": {"secretsdb": {"api_key": "deadbeef", "otherthing": "asdf"}}}, } assert preset_whitelist_baked.to_dict(include_target=True, redact_secrets=True) == { "target": ["evilcorp.org"], - "whitelist": ["1.2.3.0/24", "http://evilcorp.net/"], + "whitelist": ["1.2.3.4/24", "http://evilcorp.net"], "blacklist": ["bob@evilcorp.co.uk", "evilcorp.co.uk:443"], "config": {"modules": {"secretsdb": {"otherthing": "asdf"}}}, } diff --git a/bbot/test/test_step_1/test_regexes.py b/bbot/test/test_step_1/test_regexes.py index c8cb6d845d..dbd8dce2b0 100644 --- a/bbot/test/test_step_1/test_regexes.py +++ b/bbot/test/test_step_1/test_regexes.py @@ -267,7 +267,6 @@ def test_url_regexes(): "http:///evilcorp.com", "http:// evilcorp.com", "http://evilcorp com", - "http://evilcorp.", "http://.com", "evilcorp.com", "http://ex..ample.com", @@ -288,6 +287,7 @@ def test_url_regexes(): good_urls = [ "https://evilcorp.com", + "http://evilcorp.", "https://asdf.www.evilcorp.com", "https://asdf.www-test.evilcorp.com", "https://a.www-test.evilcorp.c", diff --git a/bbot/test/test_step_2/module_tests/test_module_excavate.py b/bbot/test/test_step_2/module_tests/test_module_excavate.py index 3795260452..76975b69f2 100644 --- a/bbot/test/test_step_2/module_tests/test_module_excavate.py +++ b/bbot/test/test_step_2/module_tests/test_module_excavate.py @@ -29,6 +29,7 @@ async def setup_before_prep(self, module_test): # these ones should + Help """ expect_args = {"method": "GET", "uri": "/"} respond_args = {"response_data": response_data} @@ -1248,11 +1249,6 @@ def check(self, module_test, events): ), f"URL extracted from extractous text is incorrect, got {url_events}" -from bbot.modules.base import BaseModule -from .base import ModuleTestBase, tempwordlist -from bbot.modules.internal.excavate import ExcavateRule - - class TestExcavate(ModuleTestBase): targets = ["http://127.0.0.1:8888/", "test.notreal", "http://127.0.0.1:8888/subdir/links.html"] modules_overrides = ["excavate", "httpx"] @@ -1380,3 +1376,29 @@ def check(self, module_test, events): assert found_first_cookie is True assert found_second_cookie is False assert found_third_cookie is False + +class TestExcavateBadURLs(ModuleTestBase): + targets = ["http://127.0.0.1:8888/"] + modules_overrides = ["excavate", "httpx", "hunt"] + config_overrides = {"interactsh_disable": True, "scope": {"report_distance": 10}} + + bad_url_data = """ +Help +Help +""" + + async def setup_after_prep(self, module_test): + module_test.set_expect_requests({"uri": "/"}, {"response_data": self.bad_url_data}) + + def check(self, module_test, events): + log_file = module_test.scan.home / "debug.log" + log_text = log_file.read_text() + # make sure our logging is working + assert "Setting scan status to STARTING" in log_text + # make sure we don't have any URL validation errors + assert "Error Parsing reconstructed URL" not in log_text + assert "Error sanitizing event data" not in log_text + + url_events = [e for e in events if e.type == "URL_UNVERIFIED"] + assert sorted([e.data for e in url_events]) == sorted(["https://ssl/", "http://127.0.0.1:8888/"]) + diff --git a/docs/scanning/index.md b/docs/scanning/index.md index 357dc5294d..b947319c45 100644 --- a/docs/scanning/index.md +++ b/docs/scanning/index.md @@ -22,6 +22,11 @@ Targets declare what's in-scope, and seed a scan with initial data. BBOT accepts - `IP_RANGE` (`1.2.3.0/24`) - `OPEN_TCP_PORT` (`192.168.0.1:80`) - `URL` (`https://www.evilcorp.com`) +- `EMAIL_ADDRESS` (`bob@evilcorp.com`) +- `ORG_STUB` (`ORG:evilcorp`) +- `USER_STUB` (`USER:bobsmith`) +- `FILESYSTEM` (`FILESYSTEM:/tmp/asdf`) +- `MOBILE_APP` (`MOBILE_APP:https://play.google.com/store/apps/details?id=com.evilcorp.app`) Note that BBOT only discriminates down to the host level. This means, for example, if you specify a URL `https://www.evilcorp.com` as the target, the scan will be *seeded* with that URL, but the scope of the scan will be the entire host, `www.evilcorp.com`. Other ports/URLs on that same host may also be scanned. diff --git a/poetry.lock b/poetry.lock index d618c103bb..6f546adf77 100644 --- a/poetry.lock +++ b/poetry.lock @@ -385,20 +385,20 @@ colorama = {version = "*", markers = "platform_system == \"Windows\""} [[package]] name = "cloudcheck" -version = "6.0.0.686" +version = "7.0.33" description = "Check whether an IP address belongs to a cloud provider" category = "main" optional = false python-versions = "<4.0,>=3.9" files = [ - {file = "cloudcheck-6.0.0.686-py3-none-any.whl", hash = "sha256:bd2494ffc5e3ee2c6ab6c13bf2842407da02a2ff27edd9788d73e13a276f1c39"}, - {file = "cloudcheck-6.0.0.686.tar.gz", hash = "sha256:70ef41a1e03dcc35093322d7bb1cd6636ea945bdbeceda15e9120013023ee5cb"}, + {file = "cloudcheck-7.0.33-py3-none-any.whl", hash = "sha256:005d6888b3b4526888f98f9514487e801d521d756b48c7ff55daa9a638fda570"}, + {file = "cloudcheck-7.0.33.tar.gz", hash = "sha256:36699d3868ffcdd3ac36e761e3c074a69d32120c787013d36820f6766ab73543"}, ] [package.dependencies] httpx = ">=0.26,<0.29" pydantic = ">=2.4.2,<3.0.0" -radixtarget = ">=2.0.0.32,<3.0.0.0" +radixtarget = ">=3.0.13,<4.0.0" regex = ">=2024.4.16,<2025.0.0" [[package]] @@ -2450,14 +2450,14 @@ cffi = {version = "*", markers = "implementation_name == \"pypy\""} [[package]] name = "radixtarget" -version = "2.0.0.58" +version = "3.0.15" description = "Check whether an IP address belongs to a cloud provider" category = "main" optional = false python-versions = "<4.0,>=3.9" files = [ - {file = "radixtarget-2.0.0.58-py3-none-any.whl", hash = "sha256:da1feb277012a115c26b370f5e2102dd31ff8745294fddc75f2d2664cc8820ad"}, - {file = "radixtarget-2.0.0.58.tar.gz", hash = "sha256:2d909608503698495b135cf1c2446c23c1d6f9dc4dfc6e6ed5517fcb5f7ee46e"}, + {file = "radixtarget-3.0.15-py3-none-any.whl", hash = "sha256:1e1d0dd3e8742ffcfc42084eb238f31f6785626b876ab63a9f28a29e97bd3bb0"}, + {file = "radixtarget-3.0.15.tar.gz", hash = "sha256:dedfad3aea1e973f261b7bc0d8936423f59ae4d082648fd496c6cdfdfa069fea"}, ] [[package]] @@ -3317,3 +3317,4 @@ type = ["pytest-mypy"] lock-version = "2.0" python-versions = "^3.9" content-hash = "5b7316c4fbfddce5a1d70a874070d8edc83320ff73fea32801fecc0acffc0d17" + diff --git a/pyproject.toml b/pyproject.toml index ed45800a32..444891ec9d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -55,8 +55,8 @@ pyzmq = "^26.0.3" httpx = "^0.27.0" pyahocorasick = "^2.1.0" puremagic = "^1.28" -cloudcheck = "^6.0.0.602" -radixtarget = "^2.0.0.50" +radixtarget = "^3.0.13" +cloudcheck = "^7.0.12" orjson = "^3.10.12" [tool.poetry.group.dev.dependencies]