Skip to content

Commit

Permalink
Merge branch 'dev' into lightfuzz
Browse files Browse the repository at this point in the history
  • Loading branch information
liquidsec authored Dec 20, 2024
2 parents 8808fd3 + 798670d commit 7487d7b
Show file tree
Hide file tree
Showing 16 changed files with 137 additions and 47 deletions.
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,11 @@ Targets can be any of the following:
- `IP_RANGE` (`1.2.3.0/24`)
- `OPEN_TCP_PORT` (`192.168.0.1:80`)
- `URL` (`https://www.evilcorp.com`)
- `EMAIL_ADDRESS` (`[email protected]`)
- `ORG_STUB` (`ORG:evilcorp`)
- `USER_STUB` (`USER:bobsmith`)
- `FILESYSTEM` (`FILESYSTEM:/tmp/asdf`)
- `MOBILE_APP` (`MOBILE_APP:https://play.google.com/store/apps/details?id=com.evilcorp.app`)

For more information, see [Targets](https://www.blacklanternsecurity.com/bbot/Stable/scanning/#targets-t). To learn how BBOT handles scope, see [Scope](https://www.blacklanternsecurity.com/bbot/Stable/scanning/#scope).

Expand Down
23 changes: 22 additions & 1 deletion bbot/core/event/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@
from copy import copy, deepcopy
from contextlib import suppress
from radixtarget import RadixTarget
from urllib.parse import urljoin, parse_qs
from pydantic import BaseModel, field_validator
from urllib.parse import urlparse, urljoin, parse_qs


from .helpers import *
Expand Down Expand Up @@ -1646,6 +1646,27 @@ class RAW_DNS_RECORD(DictHostEvent, DnsEvent):
class MOBILE_APP(DictEvent):
_always_emit = True

def _sanitize_data(self, data):
if isinstance(data, str):
data = {"url": data}
if "url" not in data:
raise ValidationError("url is required for MOBILE_APP events")
url = data["url"]
# parse URL
try:
self.parsed_url = urlparse(url)
except Exception as e:
raise ValidationError(f"Error parsing URL {url}: {e}")
if not "id" in data:
# extract "id" getparam
params = parse_qs(self.parsed_url.query)
try:
_id = params["id"][0]
except Exception:
raise ValidationError("id is required for MOBILE_APP events")
data["id"] = _id
return data

def _pretty_string(self):
return self.data["url"]

Expand Down
8 changes: 1 addition & 7 deletions bbot/core/helpers/misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -562,13 +562,12 @@ def is_port(p):
return p and p.isdigit() and 0 <= int(p) <= 65535


def is_dns_name(d, include_local=True):
def is_dns_name(d):
"""
Determines if the given string is a valid DNS name.
Args:
d (str): The string to be checked.
include_local (bool): Consider local hostnames to be valid (hostnames without periods)
Returns:
bool: True if the string is a valid DNS name, False otherwise.
Expand All @@ -578,17 +577,12 @@ def is_dns_name(d, include_local=True):
True
>>> is_dns_name('localhost')
True
>>> is_dns_name('localhost', include_local=False)
False
>>> is_dns_name('192.168.1.1')
False
"""
if is_ip(d):
return False
d = smart_decode(d)
if include_local:
if bbot_regexes.hostname_regex.match(d):
return True
if bbot_regexes.dns_name_validation_regex.match(d):
return True
return False
Expand Down
15 changes: 3 additions & 12 deletions bbot/core/helpers/regexes.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,14 +39,10 @@
ip_range_regexes = [re.compile(r, re.I) for r in _ip_range_regexes]

# dns names with periods
_dns_name_regex = r"(?:\w(?:[\w-]{0,100}\w)?\.)+(?:[xX][nN]--)?[^\W_]{1,63}\.?"
_dns_name_regex = r"(?:\w(?:[\w-]{0,100}\w)?\.?)+(?:[xX][nN]--)?[^\W_]{1,63}\.?"
dns_name_extraction_regex = re.compile(_dns_name_regex, re.I)
dns_name_validation_regex = re.compile(r"^" + _dns_name_regex + r"$", re.I)

# dns names without periods
_hostname_regex = r"(?!\w*\.\w+)\w(?:[\w-]{0,100}\w)?"
hostname_regex = re.compile(r"^" + _hostname_regex + r"$", re.I)

_email_regex = r"(?:[^\W_][\w\-\.\+']{,100})@" + _dns_name_regex
email_regex = re.compile(_email_regex, re.I)

Expand All @@ -61,14 +57,12 @@

_open_port_regexes = (
_dns_name_regex + r":[0-9]{1,5}",
_hostname_regex + r":[0-9]{1,5}",
r"\[" + _ipv6_regex + r"\]:[0-9]{1,5}",
)
open_port_regexes = [re.compile(r, re.I) for r in _open_port_regexes]

_url_regexes = (
r"https?://" + _dns_name_regex + r"(?::[0-9]{1,5})?(?:(?:/|\?).*)?",
r"https?://" + _hostname_regex + r"(?::[0-9]{1,5})?(?:(?:/|\?).*)?",
r"https?://\[" + _ipv6_regex + r"\](?::[0-9]{1,5})?(?:(?:/|\?).*)?",
)
url_regexes = [re.compile(r, re.I) for r in _url_regexes]
Expand All @@ -83,10 +77,7 @@
for k, regexes in (
(
"DNS_NAME",
(
r"^" + _dns_name_regex + r"$",
r"^" + _hostname_regex + r"$",
),
(r"^" + _dns_name_regex + r"$",),
),
(
"EMAIL_ADDRESS",
Expand Down Expand Up @@ -170,7 +161,7 @@
button_tag_regex2 = re.compile(
r"<button[^>]*?value=[\"\']?([\-%\._=+\/\w]*)[\"\']?[^>]*?name=[\"\']?([\-\._=+\/\w]+)[\"\']?[^>]*?>"
)
tag_attribute_regex = re.compile(r"<[^>]*(?:href|action|src)\s*=\s*[\"\']?([^\s\'\"\>]+)[\"\']?[^>]*>")
tag_attribute_regex = re.compile(r"<[^>]*(?:href|action|src)\s*=\s*[\"\']?(?!mailto:)([^\s\'\"\>]+)[\"\']?[^>]*>")

valid_netloc = r"[^\s!@#$%^&()=/?\\'\";~`<>]+"

Expand Down
6 changes: 1 addition & 5 deletions bbot/modules/github_org.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,11 +206,7 @@ async def validate_org(self, org):
for k, v in json.items():
if (
isinstance(v, str)
and (
self.helpers.is_dns_name(v, include_local=False)
or self.helpers.is_url(v)
or self.helpers.is_email(v)
)
and (self.helpers.is_dns_name(v) and "." in v or self.helpers.is_url(v) or self.helpers.is_email(v))
and self.scan.in_scope(v)
):
self.verbose(f'Found in-scope key "{k}": "{v}" for {org}, it appears to be in-scope')
Expand Down
6 changes: 4 additions & 2 deletions bbot/modules/internal/excavate.py
Original file line number Diff line number Diff line change
Expand Up @@ -613,7 +613,9 @@ async def process(self, yara_results, event, yara_rule_settings, discovery_conte
if self.excavate.in_bl(parameter_name) is False:
parsed_url = urlparse(url)
if not parsed_url.hostname:
self.excavate.warning(f"Error Parsing reconstructed URL [{url}] during parameter extraction, missing hostname")
self.excavate.warning(
f"Error Parsing reconstructed URL [{url}] during parameter extraction, missing hostname"
)
continue
description = f"HTTP Extracted Parameter [{parameter_name}] ({parameterExtractorSubModule.name} Submodule)"
data = {
Expand Down Expand Up @@ -814,7 +816,7 @@ class URLExtractor(ExcavateRule):
"""
),
}
full_url_regex = re.compile(r"(https?)://((?:\w|\d)(?:[\d\w-]+\.?)+(?::\d{1,5})?(?:/[-\w\.\(\)]*[-\w\.]+)*/?)")
full_url_regex = re.compile(r"(https?)://(\w(?:[\w-]+\.?)+(?::\d{1,5})?(?:/[-\w\.\(\)]*[-\w\.]+)*/?)")
full_url_regex_strict = re.compile(r"^(https?):\/\/([\w.-]+)(?::\d{1,5})?(\/[\w\/\.-]*)?(\?[^\s]+)?$")
tag_attribute_regex = bbot_regexes.tag_attribute_regex

Expand Down
2 changes: 1 addition & 1 deletion bbot/modules/trufflehog.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ class trufflehog(BaseModule):
}

options = {
"version": "3.87.0",
"version": "3.87.2",
"config": "",
"only_verified": True,
"concurrency": 8,
Expand Down
16 changes: 15 additions & 1 deletion bbot/scanner/target.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,9 +95,9 @@ def add(self, targets):
else:
event = self.make_event(target)
if event:
self.inputs.add(target)
_events = [event]
for event in _events:
self.inputs.add(event.data)
events.add(event)

# sort by host size to ensure consistency
Expand Down Expand Up @@ -140,6 +140,20 @@ def handle_username(self, match):
return [username_event]
return []

@special_target_type(r"^(?:FILESYSTEM|FILE|FOLDER|DIR|PATH):(.*)")
def handle_filesystem(self, match):
filesystem_event = self.make_event({"path": match.group(1)}, event_type="FILESYSTEM")
if filesystem_event:
return [filesystem_event]
return []

@special_target_type(r"^(?:MOBILE_APP|APK|IPA|APP):(.*)")
def handle_mobile_app(self, match):
mobile_app_event = self.make_event({"url": match.group(1)}, event_type="MOBILE_APP")
if mobile_app_event:
return [mobile_app_event]
return []

def get(self, event, single=True, **kwargs):
results = super().get(event, **kwargs)
if results and single:
Expand Down
39 changes: 39 additions & 0 deletions bbot/test/test_step_1/test_events.py
Original file line number Diff line number Diff line change
Expand Up @@ -979,6 +979,45 @@ def test_event_magic():
zip_file.unlink()


@pytest.mark.asyncio
async def test_mobile_app():
scan = Scanner()
with pytest.raises(ValidationError):
scan.make_event("com.evilcorp.app", "MOBILE_APP", parent=scan.root_event)
with pytest.raises(ValidationError):
scan.make_event({"id": "com.evilcorp.app"}, "MOBILE_APP", parent=scan.root_event)
with pytest.raises(ValidationError):
scan.make_event({"url": "https://play.google.com/store/apps/details"}, "MOBILE_APP", parent=scan.root_event)
mobile_app = scan.make_event(
{"url": "https://play.google.com/store/apps/details?id=com.evilcorp.app"}, "MOBILE_APP", parent=scan.root_event
)
assert sorted(mobile_app.data.items()) == [
("id", "com.evilcorp.app"),
("url", "https://play.google.com/store/apps/details?id=com.evilcorp.app"),
]

scan = Scanner("MOBILE_APP:https://play.google.com/store/apps/details?id=com.evilcorp.app")
events = [e async for e in scan.async_start()]
assert len(events) == 3
mobile_app_event = [e for e in events if e.type == "MOBILE_APP"][0]
assert mobile_app_event.type == "MOBILE_APP"
assert sorted(mobile_app_event.data.items()) == [
("id", "com.evilcorp.app"),
("url", "https://play.google.com/store/apps/details?id=com.evilcorp.app"),
]


@pytest.mark.asyncio
async def test_filesystem():
scan = Scanner("FILESYSTEM:/tmp/asdf")
events = [e async for e in scan.async_start()]
assert len(events) == 3
filesystem_events = [e for e in events if e.type == "FILESYSTEM"]
assert len(filesystem_events) == 1
assert filesystem_events[0].type == "FILESYSTEM"
assert filesystem_events[0].data == {"path": "/tmp/asdf"}


def test_event_hashing():
scan = Scanner("example.com")
url_event = scan.make_event("https://api.example.com/", "URL_UNVERIFIED", parent=scan.root_event)
Expand Down
2 changes: 1 addition & 1 deletion bbot/test/test_step_1/test_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ async def test_helpers_misc(helpers, scan, bbot_scanner, bbot_httpserver):
assert not helpers.is_dns_name("evilcorp.com:80")
assert not helpers.is_dns_name("http://evilcorp.com:80")
assert helpers.is_dns_name("evilcorp")
assert not helpers.is_dns_name("evilcorp", include_local=False)
assert helpers.is_dns_name("evilcorp.")
assert helpers.is_dns_name("ドメイン.テスト")
assert not helpers.is_dns_name("127.0.0.1")
assert not helpers.is_dns_name("dead::beef")
Expand Down
4 changes: 2 additions & 2 deletions bbot/test/test_step_1/test_presets.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,13 +272,13 @@ def test_preset_scope():
}
assert preset_whitelist_baked.to_dict(include_target=True) == {
"target": ["evilcorp.org"],
"whitelist": ["1.2.3.0/24", "http://evilcorp.net/"],
"whitelist": ["1.2.3.4/24", "http://evilcorp.net"],
"blacklist": ["[email protected]", "evilcorp.co.uk:443"],
"config": {"modules": {"secretsdb": {"api_key": "deadbeef", "otherthing": "asdf"}}},
}
assert preset_whitelist_baked.to_dict(include_target=True, redact_secrets=True) == {
"target": ["evilcorp.org"],
"whitelist": ["1.2.3.0/24", "http://evilcorp.net/"],
"whitelist": ["1.2.3.4/24", "http://evilcorp.net"],
"blacklist": ["[email protected]", "evilcorp.co.uk:443"],
"config": {"modules": {"secretsdb": {"otherthing": "asdf"}}},
}
Expand Down
2 changes: 1 addition & 1 deletion bbot/test/test_step_1/test_regexes.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,6 @@ def test_url_regexes():
"http:///evilcorp.com",
"http:// evilcorp.com",
"http://evilcorp com",
"http://evilcorp.",
"http://.com",
"evilcorp.com",
"http://ex..ample.com",
Expand All @@ -288,6 +287,7 @@ def test_url_regexes():

good_urls = [
"https://evilcorp.com",
"http://evilcorp.",
"https://asdf.www.evilcorp.com",
"https://asdf.www-test.evilcorp.com",
"https://a.www-test.evilcorp.c",
Expand Down
32 changes: 27 additions & 5 deletions bbot/test/test_step_2/module_tests/test_module_excavate.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ async def setup_before_prep(self, module_test):
# these ones should
<a href="/a_relative.txt">
<link href="/link_relative.txt">
<a href="mailto:[email protected]?subject=help">Help</a>
"""
expect_args = {"method": "GET", "uri": "/"}
respond_args = {"response_data": response_data}
Expand Down Expand Up @@ -1248,11 +1249,6 @@ def check(self, module_test, events):
), f"URL extracted from extractous text is incorrect, got {url_events}"


from bbot.modules.base import BaseModule
from .base import ModuleTestBase, tempwordlist
from bbot.modules.internal.excavate import ExcavateRule


class TestExcavate(ModuleTestBase):
targets = ["http://127.0.0.1:8888/", "test.notreal", "http://127.0.0.1:8888/subdir/links.html"]
modules_overrides = ["excavate", "httpx"]
Expand Down Expand Up @@ -1380,3 +1376,29 @@ def check(self, module_test, events):
assert found_first_cookie is True
assert found_second_cookie is False
assert found_third_cookie is False

class TestExcavateBadURLs(ModuleTestBase):
targets = ["http://127.0.0.1:8888/"]
modules_overrides = ["excavate", "httpx", "hunt"]
config_overrides = {"interactsh_disable": True, "scope": {"report_distance": 10}}

bad_url_data = """
<a href='mailto:[email protected]?subject=help'>Help</a>
<a href='https://ssl.'>Help</a>
"""

async def setup_after_prep(self, module_test):
module_test.set_expect_requests({"uri": "/"}, {"response_data": self.bad_url_data})

def check(self, module_test, events):
log_file = module_test.scan.home / "debug.log"
log_text = log_file.read_text()
# make sure our logging is working
assert "Setting scan status to STARTING" in log_text
# make sure we don't have any URL validation errors
assert "Error Parsing reconstructed URL" not in log_text
assert "Error sanitizing event data" not in log_text

url_events = [e for e in events if e.type == "URL_UNVERIFIED"]
assert sorted([e.data for e in url_events]) == sorted(["https://ssl/", "http://127.0.0.1:8888/"])

5 changes: 5 additions & 0 deletions docs/scanning/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,11 @@ Targets declare what's in-scope, and seed a scan with initial data. BBOT accepts
- `IP_RANGE` (`1.2.3.0/24`)
- `OPEN_TCP_PORT` (`192.168.0.1:80`)
- `URL` (`https://www.evilcorp.com`)
- `EMAIL_ADDRESS` (`[email protected]`)
- `ORG_STUB` (`ORG:evilcorp`)
- `USER_STUB` (`USER:bobsmith`)
- `FILESYSTEM` (`FILESYSTEM:/tmp/asdf`)
- `MOBILE_APP` (`MOBILE_APP:https://play.google.com/store/apps/details?id=com.evilcorp.app`)

Note that BBOT only discriminates down to the host level. This means, for example, if you specify a URL `https://www.evilcorp.com` as the target, the scan will be *seeded* with that URL, but the scope of the scan will be the entire host, `www.evilcorp.com`. Other ports/URLs on that same host may also be scanned.

Expand Down
Loading

0 comments on commit 7487d7b

Please sign in to comment.