Skip to content

Commit

Permalink
documentation, tests for blacklisting by regex
Browse files Browse the repository at this point in the history
  • Loading branch information
github-actions committed Nov 6, 2024
1 parent 441445b commit dd60cea
Show file tree
Hide file tree
Showing 8 changed files with 141 additions and 52 deletions.
4 changes: 2 additions & 2 deletions bbot/modules/bevigil.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,14 +60,14 @@ async def request_urls(self, query):
url = f"{self.base_url}/{self.helpers.quote(query)}/urls/"
return await self.api_request(url)

def parse_subdomains(self, r, query=None):
async def parse_subdomains(self, r, query=None):
results = set()
subdomains = r.json().get("subdomains")
if subdomains:
results.update(subdomains)
return results

def parse_urls(self, r, query=None):
async def parse_urls(self, r, query=None):
results = set()
urls = r.json().get("urls")
if urls:
Expand Down
4 changes: 3 additions & 1 deletion bbot/scanner/preset/path.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,9 @@ def find(self, filename):
if "/" in str(filename):
if filename_path.parent not in paths_to_search:
paths_to_search.append(filename_path.parent)
log.debug(f"Searching for preset in {paths_to_search}, file candidates: {file_candidates_str}")
log.debug(
f"Searching for preset in {[str(p) for p in paths_to_search]}, file candidates: {file_candidates_str}"
)
for path in paths_to_search:
for candidate in file_candidates:
for file in path.rglob(candidate):
Expand Down
2 changes: 1 addition & 1 deletion bbot/scanner/preset/preset.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ def __init__(
# "presets" is alias to "include"
if presets and include:
raise ValueError(
'Cannot use both "presets" and "include" args at the same time (presets is only an alias to include). Please pick only one :)'
'Cannot use both "presets" and "include" args at the same time (presets is an alias to include). Please pick one or the other :)'
)
if presets and not include:
include = presets
Expand Down
79 changes: 39 additions & 40 deletions bbot/scanner/target.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,23 +42,16 @@ class BaseTarget(RadixTarget):
def __init__(self, *targets, scan=None, **kwargs):
self.scan = scan
self.events = set()
super().__init__(**kwargs)
# we preserve the raw inputs to ensure we don't lose any information
self.inputs, events = self._make_events(targets)
# sort by host size to ensure consistency
events = sorted(events, key=lambda e: (0 if not e.host else host_size_key(e.host)))
for event in events:
if event.host:
self._add(event.host, data=event)
else:
self.events.add(event)
self.inputs = set()
# Register decorated methods
for method in dir(self):
if callable(getattr(self, method)):
if callable(getattr(self, method, None)):
func = getattr(self, method)
if hasattr(func, "_regex"):
self.special_target_types[func._regex] = func

super().__init__(*targets, **kwargs)

def get(self, event, single=True, **kwargs):
"""
Override default .get() to accept events and optionally return multiple results
Expand Down Expand Up @@ -92,42 +85,42 @@ def make_event(self, *args, **kwargs):
kwargs["tags"].update(self.tags)
return make_event(*args, dummy=True, scan=self.scan, **kwargs)

def _add(self, host, data=None):
"""
Overrides the base method to enable having multiple events for the same host.
The "data" attribute of the node is now a set of events.
"""
if data is None:
event = self.make_event(host)
else:
event = data
self.events.add(event)
if event.host:
try:
event_set = self.get(event.host, single=False, raise_error=True)
event_set.add(event)
except KeyError:
event_set = {event}
super()._add(event.host, data=event_set)
return event

def _make_events(self, targets):
inputs = set()
def add(self, targets):
if not isinstance(targets, (list, set, tuple)):
targets = [targets]
events = set()
for target in targets:
_events = []
special_target_type, _events = self.check_special_target_types(str(target))
if special_target_type:
inputs.add(str(target))
self.inputs.add(str(target))
else:
event = self.make_event(target)
if event:
_events = [event]
for event in _events:
inputs.add(event.data)
self.inputs.add(event.data)
events.add(event)
return inputs, events

# sort by host size to ensure consistency
events = sorted(events, key=lambda e: (0 if not e.host else host_size_key(e.host)))
for event in events:
self._add(event.host, data=event)

def _add(self, host, data):
"""
Overrides the base method to enable having multiple events for the same host.
The "data" attribute of the node is now a set of events.
"""
self.events.add(data)
if host:
try:
event_set = self.get(host, single=False, raise_error=True)
event_set.add(data)
except KeyError:
event_set = {data}
super()._add(host, data=event_set)

def check_special_target_types(self, target):
for regex, callback in self.special_target_types.items():
Expand Down Expand Up @@ -205,14 +198,20 @@ def get(self, event, **kwargs):
"""
event = self.make_event(event)
# first, check event's host against blacklist
event_result = super().get(event, **kwargs)
try:
event_result = super().get(event, raise_error=True)
except KeyError:
event_result = None
if event_result is not None:
return event_result
# next, check event's host against regexes
host_or_url = event.host_filterable
for regex in self.blacklist_regexes:
if regex.match(host_or_url):
return event
if host_or_url:
for regex in self.blacklist_regexes:
if regex.search(str(host_or_url)):
return event
if kwargs.get("raise_error", False):
raise KeyError(f"Host not found: '{event.data}'")
return None

def _hash_value(self):
Expand Down
8 changes: 0 additions & 8 deletions bbot/test/bbot_fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,11 +222,3 @@ class bbot_events:
e.scope_distance = 0

return bbot_events


@pytest.fixture(scope="session", autouse=True)
def install_all_python_deps():
deps_pip = set()
for module in DEFAULT_PRESET.module_loader.preloaded().values():
deps_pip.update(set(module.get("deps", {}).get("pip", [])))
subprocess.run([sys.executable, "-m", "pip", "install"] + list(deps_pip))
8 changes: 8 additions & 0 deletions bbot/test/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,14 @@ def pytest_terminal_summary(terminalreporter, exitstatus, config): # pragma: no
# traceback.print_exc()


@pytest.fixture(scope="session", autouse=True)
def install_all_python_deps():
deps_pip = set()
for module in DEFAULT_PRESET.module_loader.preloaded().values():
deps_pip.update(set(module.get("deps", {}).get("pip", [])))
subprocess.run([sys.executable, "-m", "pip", "install"] + list(deps_pip))


@pytest.hookimpl(tryfirst=True, hookwrapper=True)
def pytest_sessionfinish(session, exitstatus):
# Remove handlers from all loggers to prevent logging errors at exit
Expand Down
63 changes: 63 additions & 0 deletions bbot/test/test_step_1/test_target.py
Original file line number Diff line number Diff line change
Expand Up @@ -333,3 +333,66 @@ async def test_target(bbot_scanner):
events = target.get("www.evilcorp.com", single=False)
assert len(events) == 2
assert set([e.data for e in events]) == {"http://evilcorp.com/", "evilcorp.com:443"}


@pytest.mark.asyncio
async def test_blacklist_regex(bbot_scanner, bbot_httpserver):

from bbot.scanner.target import ScanBlacklist

blacklist = ScanBlacklist("evilcorp.com")
assert blacklist.inputs == {"evilcorp.com"}
assert "www.evilcorp.com" in blacklist
assert "http://www.evilcorp.com" in blacklist
blacklist.add("RE:test")
assert "RE:test" in blacklist.inputs
assert set(blacklist.inputs) == {"evilcorp.com", "RE:test"}
assert blacklist.blacklist_regexes
assert next(iter(blacklist.blacklist_regexes)).pattern == "test"
result1 = blacklist.get("test.com")
assert result1.type == "DNS_NAME"
assert result1.data == "test.com"
result2 = blacklist.get("www.evilcorp.com")
assert result2.type == "DNS_NAME"
assert result2.data == "evilcorp.com"
result2 = blacklist.get("www.evil.com")
assert result2 is None
with pytest.raises(KeyError):
blacklist.get("www.evil.com", raise_error=True)
assert "test.com" in blacklist
assert "http://evilcorp.com/test.aspx" in blacklist
assert not "http://tes.com" in blacklist

blacklist = ScanBlacklist("evilcorp.com", r"RE:[0-9]{6}\.aspx$")
assert "http://evilcorp.com" in blacklist
assert not "http://test.com/123456" in blacklist
assert not "http://test.com/12345.aspx?a=asdf" in blacklist
assert not "http://test.com/asdf/123456.aspx/asdf" in blacklist
assert "http://test.com/asdf/123456.aspx?a=asdf" in blacklist
assert "http://test.com/asdf/123456.aspx" in blacklist

bbot_httpserver.expect_request(uri="/").respond_with_data("<a href='http://127.0.0.1:8888/asdfevil333asdf'/>")
bbot_httpserver.expect_request(uri="/asdfevilasdf").respond_with_data("")

# make sure URL is detected normally
scan = bbot_scanner("http://127.0.0.1:8888/", presets=["spider"], config={"excavate": True}, debug=True)
events = [e async for e in scan.async_start()]
urls = [e.data for e in events if e.type == "URL"]
assert len(urls) == 2
assert set(urls) == {"http://127.0.0.1:8888/", "http://127.0.0.1:8888/asdfevil333asdf"}

# same scan again but with blacklist regex
scan = bbot_scanner(
"http://127.0.0.1:8888/",
blacklist=[r"RE:evil[0-9]{3}"],
presets=["spider"],
config={"excavate": True},
debug=True,
)
print(scan.target.blacklist.blacklist_regexes)
assert scan.target.blacklist.blacklist_regexes
assert next(iter(scan.target.blacklist.blacklist_regexes)).pattern == "evil[0-9]{3}"
events = [e async for e in scan.async_start()]
urls = [e.data for e in events if e.type == "URL"]
assert len(urls) == 1
assert set(urls) == {"http://127.0.0.1:8888/"}
25 changes: 25 additions & 0 deletions docs/scanning/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -178,20 +178,45 @@ Note that `--strict-scope` only applies to targets and whitelists, but not black

BBOT allows precise control over scope with whitelists and blacklists. These both use the same syntax as `--target`, meaning they accept the same event types, and you can specify an unlimited number of them, via a file, the CLI, or both.

#### Whitelists

`--whitelist` enables you to override what's in scope. For example, if you want to run nuclei against `evilcorp.com`, but stay only inside their corporate IP range of `1.2.3.0/24`, you can accomplish this like so:

```bash
# Seed scan with evilcorp.com, but restrict scope to 1.2.3.0/24
bbot -t evilcorp.com --whitelist 1.2.3.0/24 -f subdomain-enum -m nmap nuclei --allow-deadly
```

#### Blacklists

`--blacklist` takes ultimate precedence. Anything in the blacklist is completely excluded from the scan, even if it's in the whitelist.

```bash
# Scan evilcorp.com, but exclude internal.evilcorp.com and its children
bbot -t evilcorp.com --blacklist internal.evilcorp.com -f subdomain-enum -m nmap nuclei --allow-deadly
```

#### Blacklist by Regex

Blacklists also accept regex patterns. These regexes are are checked against the full URL, including the host and path.

To specify a regex, prefix the pattern with `RE:`. For example, to exclude all events containing "signout", you could do:

```bash
bbot -t evilcorp.com --blacklist "RE:signout"
```

Note that this would blacklist both of the following events:

- `[URL] http://evilcorp.com/signout.aspx`
- `[DNS_NAME] signout.evilcorp.com`

If you only want to blacklist the URL, you could narrow the regex like so:

```bash
bbot -t evilcorp.com --blacklist 'RE:signout\.aspx$'
```

## DNS Wildcards

BBOT has robust wildcard detection built-in. It can reliably detect wildcard domains, and will tag them accordingly:
Expand Down

0 comments on commit dd60cea

Please sign in to comment.