Merge branch 'dev' into lightfuzz

blacklanternsecurity · Dec 20, 2024 · 7487d7b · 7487d7b
2 parents 8808fd3 + 798670d
commit 7487d7b
Show file tree

Hide file tree

Showing 16 changed files with 137 additions and 47 deletions.
diff --git a/README.md b/README.md
@@ -317,6 +317,11 @@ Targets can be any of the following:
 - `IP_RANGE` (`1.2.3.0/24`)
 - `OPEN_TCP_PORT` (`192.168.0.1:80`)
 - `URL` (`https://www.evilcorp.com`)
+- `EMAIL_ADDRESS` (`[email protected]`)
+- `ORG_STUB` (`ORG:evilcorp`)
+- `USER_STUB` (`USER:bobsmith`)
+- `FILESYSTEM` (`FILESYSTEM:/tmp/asdf`)
+- `MOBILE_APP` (`MOBILE_APP:https://play.google.com/store/apps/details?id=com.evilcorp.app`)
 
 For more information, see [Targets](https://www.blacklanternsecurity.com/bbot/Stable/scanning/#targets-t). To learn how BBOT handles scope, see [Scope](https://www.blacklanternsecurity.com/bbot/Stable/scanning/#scope).
 

diff --git a/bbot/core/event/base.py b/bbot/core/event/base.py
@@ -14,8 +14,8 @@
 from copy import copy, deepcopy
 from contextlib import suppress
 from radixtarget import RadixTarget
-from urllib.parse import urljoin, parse_qs
 from pydantic import BaseModel, field_validator
+from urllib.parse import urlparse, urljoin, parse_qs
 
 
 from .helpers import *
@@ -1646,6 +1646,27 @@ class RAW_DNS_RECORD(DictHostEvent, DnsEvent):
 class MOBILE_APP(DictEvent):
     _always_emit = True
 
+    def _sanitize_data(self, data):
+        if isinstance(data, str):
+            data = {"url": data}
+        if "url" not in data:
+            raise ValidationError("url is required for MOBILE_APP events")
+        url = data["url"]
+        # parse URL
+        try:
+            self.parsed_url = urlparse(url)
+        except Exception as e:
+            raise ValidationError(f"Error parsing URL {url}: {e}")
+        if not "id" in data:
+            # extract "id" getparam
+            params = parse_qs(self.parsed_url.query)
+            try:
+                _id = params["id"][0]
+            except Exception:
+                raise ValidationError("id is required for MOBILE_APP events")
+            data["id"] = _id
+        return data
+
     def _pretty_string(self):
         return self.data["url"]
 

diff --git a/bbot/core/helpers/misc.py b/bbot/core/helpers/misc.py
@@ -562,13 +562,12 @@ def is_port(p):
     return p and p.isdigit() and 0 <= int(p) <= 65535
 
 
-def is_dns_name(d, include_local=True):
+def is_dns_name(d):
     """
     Determines if the given string is a valid DNS name.
 
     Args:
         d (str): The string to be checked.
-        include_local (bool): Consider local hostnames to be valid (hostnames without periods)
 
     Returns:
         bool: True if the string is a valid DNS name, False otherwise.
@@ -578,17 +577,12 @@ def is_dns_name(d, include_local=True):
         True
         >>> is_dns_name('localhost')
         True
-        >>> is_dns_name('localhost', include_local=False)
-        False
         >>> is_dns_name('192.168.1.1')
         False
     """
     if is_ip(d):
         return False
     d = smart_decode(d)
-    if include_local:
-        if bbot_regexes.hostname_regex.match(d):
-            return True
     if bbot_regexes.dns_name_validation_regex.match(d):
         return True
     return False

diff --git a/bbot/core/helpers/regexes.py b/bbot/core/helpers/regexes.py
@@ -39,14 +39,10 @@
 ip_range_regexes = [re.compile(r, re.I) for r in _ip_range_regexes]
 
 # dns names with periods
-_dns_name_regex = r"(?:\w(?:[\w-]{0,100}\w)?\.)+(?:[xX][nN]--)?[^\W_]{1,63}\.?"
+_dns_name_regex = r"(?:\w(?:[\w-]{0,100}\w)?\.?)+(?:[xX][nN]--)?[^\W_]{1,63}\.?"
 dns_name_extraction_regex = re.compile(_dns_name_regex, re.I)
 dns_name_validation_regex = re.compile(r"^" + _dns_name_regex + r"$", re.I)
 
-# dns names without periods
-_hostname_regex = r"(?!\w*\.\w+)\w(?:[\w-]{0,100}\w)?"
-hostname_regex = re.compile(r"^" + _hostname_regex + r"$", re.I)
-
 _email_regex = r"(?:[^\W_][\w\-\.\+']{,100})@" + _dns_name_regex
 email_regex = re.compile(_email_regex, re.I)
 
@@ -61,14 +57,12 @@
 
 _open_port_regexes = (
     _dns_name_regex + r":[0-9]{1,5}",
-    _hostname_regex + r":[0-9]{1,5}",
     r"\[" + _ipv6_regex + r"\]:[0-9]{1,5}",
 )
 open_port_regexes = [re.compile(r, re.I) for r in _open_port_regexes]
 
 _url_regexes = (
     r"https?://" + _dns_name_regex + r"(?::[0-9]{1,5})?(?:(?:/|\?).*)?",
-    r"https?://" + _hostname_regex + r"(?::[0-9]{1,5})?(?:(?:/|\?).*)?",
     r"https?://\[" + _ipv6_regex + r"\](?::[0-9]{1,5})?(?:(?:/|\?).*)?",
 )
 url_regexes = [re.compile(r, re.I) for r in _url_regexes]
@@ -83,10 +77,7 @@
         for k, regexes in (
             (
                 "DNS_NAME",
-                (
-                    r"^" + _dns_name_regex + r"$",
-                    r"^" + _hostname_regex + r"$",
-                ),
+                (r"^" + _dns_name_regex + r"$",),
             ),
             (
                 "EMAIL_ADDRESS",
@@ -170,7 +161,7 @@
 button_tag_regex2 = re.compile(
     r"<button[^>]*?value=[\"\']?([\-%\._=+\/\w]*)[\"\']?[^>]*?name=[\"\']?([\-\._=+\/\w]+)[\"\']?[^>]*?>"
 )
-tag_attribute_regex = re.compile(r"<[^>]*(?:href|action|src)\s*=\s*[\"\']?([^\s\'\"\>]+)[\"\']?[^>]*>")
+tag_attribute_regex = re.compile(r"<[^>]*(?:href|action|src)\s*=\s*[\"\']?(?!mailto:)([^\s\'\"\>]+)[\"\']?[^>]*>")
 
 valid_netloc = r"[^\s!@#$%^&()=/?\\'\";~`<>]+"
 

diff --git a/bbot/modules/github_org.py b/bbot/modules/github_org.py
@@ -206,11 +206,7 @@ async def validate_org(self, org):
         for k, v in json.items():
             if (
                 isinstance(v, str)
-                and (
-                    self.helpers.is_dns_name(v, include_local=False)
-                    or self.helpers.is_url(v)
-                    or self.helpers.is_email(v)
-                )
+                and (self.helpers.is_dns_name(v) and "." in v or self.helpers.is_url(v) or self.helpers.is_email(v))
                 and self.scan.in_scope(v)
             ):
                 self.verbose(f'Found in-scope key "{k}": "{v}" for {org}, it appears to be in-scope')

diff --git a/bbot/modules/internal/excavate.py b/bbot/modules/internal/excavate.py
@@ -613,7 +613,9 @@ async def process(self, yara_results, event, yara_rule_settings, discovery_conte
                                 if self.excavate.in_bl(parameter_name) is False:
                                     parsed_url = urlparse(url)
                                     if not parsed_url.hostname:
-                                        self.excavate.warning(f"Error Parsing reconstructed URL [{url}] during parameter extraction, missing hostname")
+                                        self.excavate.warning(
+                                            f"Error Parsing reconstructed URL [{url}] during parameter extraction, missing hostname"
+                                        )
                                         continue
                                     description = f"HTTP Extracted Parameter [{parameter_name}] ({parameterExtractorSubModule.name} Submodule)"
                                     data = {
@@ -814,7 +816,7 @@ class URLExtractor(ExcavateRule):
                 """
             ),
         }
-        full_url_regex = re.compile(r"(https?)://((?:\w|\d)(?:[\d\w-]+\.?)+(?::\d{1,5})?(?:/[-\w\.\(\)]*[-\w\.]+)*/?)")
+        full_url_regex = re.compile(r"(https?)://(\w(?:[\w-]+\.?)+(?::\d{1,5})?(?:/[-\w\.\(\)]*[-\w\.]+)*/?)")
         full_url_regex_strict = re.compile(r"^(https?):\/\/([\w.-]+)(?::\d{1,5})?(\/[\w\/\.-]*)?(\?[^\s]+)?$")
         tag_attribute_regex = bbot_regexes.tag_attribute_regex
 

diff --git a/bbot/modules/trufflehog.py b/bbot/modules/trufflehog.py
@@ -13,7 +13,7 @@ class trufflehog(BaseModule):
     }
 
     options = {
-        "version": "3.87.0",
+        "version": "3.87.2",
         "config": "",
         "only_verified": True,
         "concurrency": 8,

diff --git a/bbot/scanner/target.py b/bbot/scanner/target.py
@@ -95,9 +95,9 @@ def add(self, targets):
             else:
                 event = self.make_event(target)
                 if event:
+                    self.inputs.add(target)
                     _events = [event]
             for event in _events:
-                self.inputs.add(event.data)
                 events.add(event)
 
         # sort by host size to ensure consistency
@@ -140,6 +140,20 @@ def handle_username(self, match):
             return [username_event]
         return []
 
+    @special_target_type(r"^(?:FILESYSTEM|FILE|FOLDER|DIR|PATH):(.*)")
+    def handle_filesystem(self, match):
+        filesystem_event = self.make_event({"path": match.group(1)}, event_type="FILESYSTEM")
+        if filesystem_event:
+            return [filesystem_event]
+        return []
+
+    @special_target_type(r"^(?:MOBILE_APP|APK|IPA|APP):(.*)")
+    def handle_mobile_app(self, match):
+        mobile_app_event = self.make_event({"url": match.group(1)}, event_type="MOBILE_APP")
+        if mobile_app_event:
+            return [mobile_app_event]
+        return []
+
     def get(self, event, single=True, **kwargs):
         results = super().get(event, **kwargs)
         if results and single:

diff --git a/bbot/test/test_step_1/test_events.py b/bbot/test/test_step_1/test_events.py
@@ -979,6 +979,45 @@ def test_event_magic():
     zip_file.unlink()
 
 
+@pytest.mark.asyncio
+async def test_mobile_app():
+    scan = Scanner()
+    with pytest.raises(ValidationError):
+        scan.make_event("com.evilcorp.app", "MOBILE_APP", parent=scan.root_event)
+    with pytest.raises(ValidationError):
+        scan.make_event({"id": "com.evilcorp.app"}, "MOBILE_APP", parent=scan.root_event)
+    with pytest.raises(ValidationError):
+        scan.make_event({"url": "https://play.google.com/store/apps/details"}, "MOBILE_APP", parent=scan.root_event)
+    mobile_app = scan.make_event(
+        {"url": "https://play.google.com/store/apps/details?id=com.evilcorp.app"}, "MOBILE_APP", parent=scan.root_event
+    )
+    assert sorted(mobile_app.data.items()) == [
+        ("id", "com.evilcorp.app"),
+        ("url", "https://play.google.com/store/apps/details?id=com.evilcorp.app"),
+    ]
+
+    scan = Scanner("MOBILE_APP:https://play.google.com/store/apps/details?id=com.evilcorp.app")
+    events = [e async for e in scan.async_start()]
+    assert len(events) == 3
+    mobile_app_event = [e for e in events if e.type == "MOBILE_APP"][0]
+    assert mobile_app_event.type == "MOBILE_APP"
+    assert sorted(mobile_app_event.data.items()) == [
+        ("id", "com.evilcorp.app"),
+        ("url", "https://play.google.com/store/apps/details?id=com.evilcorp.app"),
+    ]
+
+
+@pytest.mark.asyncio
+async def test_filesystem():
+    scan = Scanner("FILESYSTEM:/tmp/asdf")
+    events = [e async for e in scan.async_start()]
+    assert len(events) == 3
+    filesystem_events = [e for e in events if e.type == "FILESYSTEM"]
+    assert len(filesystem_events) == 1
+    assert filesystem_events[0].type == "FILESYSTEM"
+    assert filesystem_events[0].data == {"path": "/tmp/asdf"}
+
+
 def test_event_hashing():
     scan = Scanner("example.com")
     url_event = scan.make_event("https://api.example.com/", "URL_UNVERIFIED", parent=scan.root_event)

diff --git a/bbot/test/test_step_1/test_helpers.py b/bbot/test/test_step_1/test_helpers.py
@@ -122,7 +122,7 @@ async def test_helpers_misc(helpers, scan, bbot_scanner, bbot_httpserver):
     assert not helpers.is_dns_name("evilcorp.com:80")
     assert not helpers.is_dns_name("http://evilcorp.com:80")
     assert helpers.is_dns_name("evilcorp")
-    assert not helpers.is_dns_name("evilcorp", include_local=False)
+    assert helpers.is_dns_name("evilcorp.")
     assert helpers.is_dns_name("ドメイン.テスト")
     assert not helpers.is_dns_name("127.0.0.1")
     assert not helpers.is_dns_name("dead::beef")

diff --git a/bbot/test/test_step_1/test_presets.py b/bbot/test/test_step_1/test_presets.py
@@ -272,13 +272,13 @@ def test_preset_scope():
     }
     assert preset_whitelist_baked.to_dict(include_target=True) == {
         "target": ["evilcorp.org"],
-        "whitelist": ["1.2.3.0/24", "http://evilcorp.net/"],
+        "whitelist": ["1.2.3.4/24", "http://evilcorp.net"],
         "blacklist": ["[email protected]", "evilcorp.co.uk:443"],
         "config": {"modules": {"secretsdb": {"api_key": "deadbeef", "otherthing": "asdf"}}},
     }
     assert preset_whitelist_baked.to_dict(include_target=True, redact_secrets=True) == {
         "target": ["evilcorp.org"],
-        "whitelist": ["1.2.3.0/24", "http://evilcorp.net/"],
+        "whitelist": ["1.2.3.4/24", "http://evilcorp.net"],
         "blacklist": ["[email protected]", "evilcorp.co.uk:443"],
         "config": {"modules": {"secretsdb": {"otherthing": "asdf"}}},
     }

diff --git a/bbot/test/test_step_1/test_regexes.py b/bbot/test/test_step_1/test_regexes.py
@@ -267,7 +267,6 @@ def test_url_regexes():
         "http:///evilcorp.com",
         "http:// evilcorp.com",
         "http://evilcorp com",
-        "http://evilcorp.",
         "http://.com",
         "evilcorp.com",
         "http://ex..ample.com",
@@ -288,6 +287,7 @@ def test_url_regexes():
 
     good_urls = [
         "https://evilcorp.com",
+        "http://evilcorp.",
         "https://asdf.www.evilcorp.com",
         "https://asdf.www-test.evilcorp.com",
         "https://a.www-test.evilcorp.c",

diff --git a/bbot/test/test_step_2/module_tests/test_module_excavate.py b/bbot/test/test_step_2/module_tests/test_module_excavate.py
@@ -29,6 +29,7 @@ async def setup_before_prep(self, module_test):
         # these ones should
         <a href="/a_relative.txt">
         <link href="/link_relative.txt">
+        <a href="mailto:[email protected]?subject=help">Help</a>
         """
         expect_args = {"method": "GET", "uri": "/"}
         respond_args = {"response_data": response_data}
@@ -1248,11 +1249,6 @@ def check(self, module_test, events):
         ), f"URL extracted from extractous text is incorrect, got {url_events}"
 
 
-from bbot.modules.base import BaseModule
-from .base import ModuleTestBase, tempwordlist
-from bbot.modules.internal.excavate import ExcavateRule
-
-
 class TestExcavate(ModuleTestBase):
     targets = ["http://127.0.0.1:8888/", "test.notreal", "http://127.0.0.1:8888/subdir/links.html"]
     modules_overrides = ["excavate", "httpx"]
@@ -1380,3 +1376,29 @@ def check(self, module_test, events):
         assert found_first_cookie is True
         assert found_second_cookie is False
         assert found_third_cookie is False
+
+class TestExcavateBadURLs(ModuleTestBase):
+    targets = ["http://127.0.0.1:8888/"]
+    modules_overrides = ["excavate", "httpx", "hunt"]
+    config_overrides = {"interactsh_disable": True, "scope": {"report_distance": 10}}
+
+    bad_url_data = """
+<a href='mailto:[email protected]?subject=help'>Help</a>
+<a href='https://ssl.'>Help</a>
+"""
+
+    async def setup_after_prep(self, module_test):
+        module_test.set_expect_requests({"uri": "/"}, {"response_data": self.bad_url_data})
+
+    def check(self, module_test, events):
+        log_file = module_test.scan.home / "debug.log"
+        log_text = log_file.read_text()
+        # make sure our logging is working
+        assert "Setting scan status to STARTING" in log_text
+        # make sure we don't have any URL validation errors
+        assert "Error Parsing reconstructed URL" not in log_text
+        assert "Error sanitizing event data" not in log_text
+
+        url_events = [e for e in events if e.type == "URL_UNVERIFIED"]
+        assert sorted([e.data for e in url_events]) == sorted(["https://ssl/", "http://127.0.0.1:8888/"])
+
diff --git a/docs/scanning/index.md b/docs/scanning/index.md
@@ -22,6 +22,11 @@ Targets declare what's in-scope, and seed a scan with initial data. BBOT accepts
 - `IP_RANGE` (`1.2.3.0/24`)
 - `OPEN_TCP_PORT` (`192.168.0.1:80`)
 - `URL` (`https://www.evilcorp.com`)
+- `EMAIL_ADDRESS` (`[email protected]`)
+- `ORG_STUB` (`ORG:evilcorp`)
+- `USER_STUB` (`USER:bobsmith`)
+- `FILESYSTEM` (`FILESYSTEM:/tmp/asdf`)
+- `MOBILE_APP` (`MOBILE_APP:https://play.google.com/store/apps/details?id=com.evilcorp.app`)
 
 Note that BBOT only discriminates down to the host level. This means, for example, if you specify a URL `https://www.evilcorp.com` as the target, the scan will be *seeded* with that URL, but the scope of the scan will be the entire host, `www.evilcorp.com`. Other ports/URLs on that same host may also be scanned.