Skip to content

Commit

Permalink
Merge pull request #1899 from blacklanternsecurity/update-radixtarget
Browse files Browse the repository at this point in the history
Add blacklist regex feature
  • Loading branch information
TheTechromancer authored Nov 19, 2024
2 parents 1928939 + 3fc7ed4 commit 328a688
Show file tree
Hide file tree
Showing 60 changed files with 782 additions and 710 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ jobs:
poetry install
- name: Run tests
run: |
poetry run pytest --exitfirst --reruns 2 -o timeout_func_only=true --timeout 1200 --disable-warnings --log-cli-level=INFO --cov-config=bbot/test/coverage.cfg --cov-report xml:cov.xml --cov=bbot .
poetry run pytest -vv --exitfirst --reruns 2 -o timeout_func_only=true --timeout 1200 --disable-warnings --log-cli-level=INFO --cov-config=bbot/test/coverage.cfg --cov-report xml:cov.xml --cov=bbot .
- name: Upload Debug Logs
uses: actions/upload-artifact@v3
with:
Expand Down
2 changes: 1 addition & 1 deletion bbot/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ async def _main():
if sys.stdin.isatty():

# warn if any targets belong directly to a cloud provider
for event in scan.target.events:
for event in scan.target.seeds.events:
if event.type == "DNS_NAME":
cloudcheck_result = scan.helpers.cloudcheck(event.host)
if cloudcheck_result:
Expand Down
4 changes: 2 additions & 2 deletions bbot/core/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -641,7 +641,7 @@ async def finished_tasks(self, tasks, timeout=None):
except BaseException as e:
if isinstance(e, (TimeoutError, asyncio.exceptions.TimeoutError)):
self.log.warning(f"{self.name}: Timeout after {timeout:,} seconds in finished_tasks({tasks})")
for task in tasks:
for task in list(tasks):
task.cancel()
self._await_cancelled_task(task)
else:
Expand Down Expand Up @@ -683,5 +683,5 @@ async def cancel_all_tasks(self):
for client_id in list(self.tasks):
await self.cancel_task(client_id)
for client_id, tasks in self.child_tasks.items():
for task in tasks:
for task in list(tasks):
await self._await_cancelled_task(task)
25 changes: 23 additions & 2 deletions bbot/core/event/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,6 +341,21 @@ def host_original(self):
return self.host
return self._host_original

@property
def host_filterable(self):
"""
A string version of the event that's used for regex-based blacklisting.
For example, the user can specify "REGEX:.*.evilcorp.com" in their blacklist, and this regex
will be applied against this property.
"""
parsed_url = getattr(self, "parsed_url", None)
if parsed_url is not None:
return parsed_url.geturl()
if self.host is not None:
return str(self.host)
return ""

@property
def port(self):
self.host
Expand Down Expand Up @@ -1114,8 +1129,7 @@ def __init__(self, *args, **kwargs):
class IP_RANGE(DnsEvent):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
net = ipaddress.ip_network(self.data, strict=False)
self.add_tag(f"ipv{net.version}")
self.add_tag(f"ipv{self.host.version}")

def sanitize_data(self, data):
return str(ipaddress.ip_network(str(data), strict=False))
Expand Down Expand Up @@ -1689,6 +1703,13 @@ def make_event(
if event_type == "USERNAME" and validators.soft_validate(data, "email"):
event_type = "EMAIL_ADDRESS"
tags.add("affiliate")
# Convert single-host IP_RANGE to IP_ADDRESS
if event_type == "IP_RANGE":
with suppress(Exception):
net = ipaddress.ip_network(data, strict=False)
if net.prefixlen == net.max_prefixlen:
event_type = "IP_ADDRESS"
data = net.network_address

event_class = globals().get(event_type, DefaultEvent)

Expand Down
9 changes: 8 additions & 1 deletion bbot/core/helpers/bloom.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,15 @@ def _fnv1a_hash(self, data):
hash = (hash * 0x01000193) % 2**32 # 16777619
return hash

def __del__(self):
def close(self):
"""Explicitly close the memory-mapped file."""
self.mmap_file.close()

def __del__(self):
try:
self.close()
except Exception:
pass

def __contains__(self, item):
return self.check(item)
4 changes: 2 additions & 2 deletions bbot/core/helpers/dns/helpers.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import logging

from bbot.core.helpers.regexes import dns_name_regex
from bbot.core.helpers.regexes import dns_name_extraction_regex
from bbot.core.helpers.misc import clean_dns_record, smart_decode

log = logging.getLogger("bbot.core.helpers.dns")
Expand Down Expand Up @@ -198,7 +198,7 @@ def add_result(rdtype, _record):
elif rdtype == "TXT":
for s in record.strings:
s = smart_decode(s)
for match in dns_name_regex.finditer(s):
for match in dns_name_extraction_regex.finditer(s):
start, end = match.span()
host = s[start:end]
add_result(rdtype, host)
Expand Down
7 changes: 4 additions & 3 deletions bbot/core/helpers/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,11 @@
from .regex import RegexHelper
from .wordcloud import WordCloud
from .interactsh import Interactsh
from ...scanner.target import Target
from .depsinstaller import DepsInstaller
from .async_helpers import get_event_loop

from bbot.scanner.target import BaseTarget

log = logging.getLogger("bbot.core.helpers")


Expand Down Expand Up @@ -155,8 +156,8 @@ def clean_old_scans(self):
_filter = lambda x: x.is_dir() and self.regexes.scan_name_regex.match(x.name)
self.clean_old(self.scans_dir, keep=self.keep_old_scans, filter=_filter)

def make_target(self, *events, **kwargs):
return Target(*events, **kwargs)
def make_target(self, *targets, **kwargs):
return BaseTarget(*targets, scan=self.scan, **kwargs)

@property
def config(self):
Expand Down
16 changes: 11 additions & 5 deletions bbot/core/helpers/misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -586,17 +586,18 @@ def is_dns_name(d, include_local=True):
if include_local:
if bbot_regexes.hostname_regex.match(d):
return True
if bbot_regexes.dns_name_regex.match(d):
if bbot_regexes.dns_name_validation_regex.match(d):
return True
return False


def is_ip(d, version=None):
def is_ip(d, version=None, include_network=False):
"""
Checks if the given string or object represents a valid IP address.
Args:
d (str or ipaddress.IPvXAddress): The IP address to check.
include_network (bool, optional): Whether to include network types (IPv4Network or IPv6Network). Defaults to False.
version (int, optional): The IP version to validate (4 or 6). Default is None.
Returns:
Expand All @@ -612,12 +613,17 @@ def is_ip(d, version=None):
>>> is_ip('evilcorp.com')
False
"""
ip = None
try:
ip = ipaddress.ip_address(d)
if version is None or ip.version == version:
return True
except Exception:
pass
if include_network:
try:
ip = ipaddress.ip_network(d, strict=False)
except Exception:
pass
if ip is not None and (version is None or ip.version == version):
return True
return False


Expand Down
3 changes: 2 additions & 1 deletion bbot/core/helpers/regexes.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@

# dns names with periods
_dns_name_regex = r"(?:\w(?:[\w-]{0,100}\w)?\.)+(?:[xX][nN]--)?[^\W_]{1,63}\.?"
dns_name_regex = re.compile(_dns_name_regex, re.I)
dns_name_extraction_regex = re.compile(_dns_name_regex, re.I)
dns_name_validation_regex = re.compile(r"^" + _dns_name_regex + r"$", re.I)

# dns names without periods
_hostname_regex = r"(?!\w*\.\w+)\w(?:[\w-]{0,100}\w)?"
Expand Down
2 changes: 1 addition & 1 deletion bbot/core/helpers/web/web.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def __init__(self, parent_helper):
self.ssl_verify = self.config.get("ssl_verify", False)
engine_debug = self.config.get("engine", {}).get("debug", False)
super().__init__(
server_kwargs={"config": self.config, "target": self.parent_helper.preset.target.radix_only},
server_kwargs={"config": self.config, "target": self.parent_helper.preset.target.minimal},
debug=engine_debug,
)

Expand Down
2 changes: 1 addition & 1 deletion bbot/modules/anubisdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ async def abort_if(self, event):
return True, "DNS name is unresolved"
return await super().abort_if(event)

def parse_results(self, r, query):
async def parse_results(self, r, query):
results = set()
json = r.json()
if json:
Expand Down
2 changes: 1 addition & 1 deletion bbot/modules/baddns.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ async def handle_event(self, event):
context=f'{{module}}\'s "{r_dict["module"]}" module found {{event.type}}: {r_dict["description"]}',
)
else:
self.warning(f"Got unrecognized confidence level: {r['confidence']}")
self.warning(f"Got unrecognized confidence level: {r_dict['confidence']}")

found_domains = r_dict.get("found_domains", None)
if found_domains:
Expand Down
4 changes: 2 additions & 2 deletions bbot/modules/bevigil.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,14 +60,14 @@ async def request_urls(self, query):
url = f"{self.base_url}/{self.helpers.quote(query)}/urls/"
return await self.api_request(url)

def parse_subdomains(self, r, query=None):
async def parse_subdomains(self, r, query=None):
results = set()
subdomains = r.json().get("subdomains")
if subdomains:
results.update(subdomains)
return results

def parse_urls(self, r, query=None):
async def parse_urls(self, r, query=None):
results = set()
urls = r.json().get("urls")
if urls:
Expand Down
2 changes: 1 addition & 1 deletion bbot/modules/binaryedge.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,6 @@ async def request_url(self, query):
url = f"{self.base_url}/query/domains/subdomain/{self.helpers.quote(query)}"
return await self.api_request(url)

def parse_results(self, r, query):
async def parse_results(self, r, query):
j = r.json()
return j.get("events", [])
5 changes: 2 additions & 3 deletions bbot/modules/bufferoverrun.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ async def request_url(self, query):
url = f"{self.commercial_base_url if self.commercial else self.base_url}?q=.{query}"
return await self.api_request(url)

def parse_results(self, r, query):
async def parse_results(self, r, query):
j = r.json()
subdomains_set = set()
if isinstance(j, dict):
Expand All @@ -44,5 +44,4 @@ def parse_results(self, r, query):
subdomain = parts[4].strip()
if subdomain and subdomain.endswith(f".{query}"):
subdomains_set.add(subdomain)
for subdomain in subdomains_set:
yield subdomain
return subdomains_set
4 changes: 2 additions & 2 deletions bbot/modules/builtwith.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ async def request_redirects(self, query):
url = f"{self.base_url}/redirect1/api.json?KEY={{api_key}}&LOOKUP={query}"
return await self.api_request(url)

def parse_domains(self, r, query):
async def parse_domains(self, r, query):
"""
This method returns a set of subdomains.
Each subdomain is an "FQDN" that was reported in the "Detailed Technology Profile" page on builtwith.com
Expand Down Expand Up @@ -92,7 +92,7 @@ def parse_domains(self, r, query):
self.verbose(f"No results for {query}: {error}")
return results_set

def parse_redirects(self, r, query):
async def parse_redirects(self, r, query):
"""
This method creates a set.
Each entry in the set is either an Inbound or Outbound Redirect reported in the "Redirect Profile" page on builtwith.com
Expand Down
6 changes: 4 additions & 2 deletions bbot/modules/c99.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,14 @@ async def request_url(self, query):
url = f"{self.base_url}/subdomainfinder?key={{api_key}}&domain={self.helpers.quote(query)}&json"
return await self.api_request(url)

def parse_results(self, r, query):
async def parse_results(self, r, query):
results = set()
j = r.json()
if isinstance(j, dict):
subdomains = j.get("subdomains", [])
if subdomains:
for s in subdomains:
subdomain = s.get("subdomain", "")
if subdomain:
yield subdomain
results.add(subdomain)
return results
6 changes: 4 additions & 2 deletions bbot/modules/certspotter.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,11 @@ def request_url(self, query):
url = f"{self.base_url}/issuances?domain={self.helpers.quote(query)}&include_subdomains=true&expand=dns_names"
return self.api_request(url, timeout=self.http_timeout + 30)

def parse_results(self, r, query):
async def parse_results(self, r, query):
results = set()
json = r.json()
if json:
for r in json:
for dns_name in r.get("dns_names", []):
yield dns_name.lstrip(".*").rstrip(".")
results.add(dns_name.lstrip(".*").rstrip("."))
return results
6 changes: 4 additions & 2 deletions bbot/modules/chaos.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ async def request_url(self, query):
url = f"{self.base_url}/{domain}/subdomains"
return await self.api_request(url)

def parse_results(self, r, query):
async def parse_results(self, r, query):
results = set()
j = r.json()
subdomains_set = set()
if isinstance(j, dict):
Expand All @@ -39,4 +40,5 @@ def parse_results(self, r, query):
for s in subdomains_set:
full_subdomain = f"{s}.{domain}"
if full_subdomain and full_subdomain.endswith(f".{query}"):
yield full_subdomain
results.add(full_subdomain)
return results
2 changes: 1 addition & 1 deletion bbot/modules/columbus.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ async def request_url(self, query):
url = f"{self.base_url}/{self.helpers.quote(query)}?days=365"
return await self.api_request(url)

def parse_results(self, r, query):
async def parse_results(self, r, query):
results = set()
json = r.json()
if json and isinstance(json, list):
Expand Down
6 changes: 4 additions & 2 deletions bbot/modules/crt.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ async def request_url(self, query):
url = self.helpers.add_get_params(self.base_url, params).geturl()
return await self.api_request(url, timeout=self.http_timeout + 30)

def parse_results(self, r, query):
async def parse_results(self, r, query):
results = set()
j = r.json()
for cert_info in j:
if not type(cert_info) == dict:
Expand All @@ -35,4 +36,5 @@ def parse_results(self, r, query):
domain = cert_info.get("name_value")
if domain:
for d in domain.splitlines():
yield d.lower()
results.add(d.lower())
return results
2 changes: 1 addition & 1 deletion bbot/modules/digitorus.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ async def request_url(self, query):
url = f"{self.base_url}/{self.helpers.quote(query)}"
return await self.helpers.request(url)

def parse_results(self, r, query):
async def parse_results(self, r, query):
results = set()
content = getattr(r, "text", "")
extract_regex = re.compile(r"[\w.-]+\." + query, re.I)
Expand Down
6 changes: 3 additions & 3 deletions bbot/modules/dnscaa.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
#
# Checks for and parses CAA DNS TXT records for IODEF reporting destination email addresses and/or URL's.
#
# NOTE: when the target domain is initially resolved basic "dns_name_regex" matched targets will be extracted so we do not perform that again here.
# NOTE: when the target domain is initially resolved basic "dns_name_extraction_regex" matched targets will be extracted so we do not perform that again here.
#
# Example CAA records,
# 0 iodef "mailto:[email protected]"
Expand All @@ -23,7 +23,7 @@

import re

from bbot.core.helpers.regexes import dns_name_regex, email_regex, url_regexes
from bbot.core.helpers.regexes import dns_name_extraction_regex, email_regex, url_regexes

# Handle '0 iodef "mailto:[email protected]"'
# Handle '1 iodef "https://some.host.tld/caa;"'
Expand Down Expand Up @@ -109,7 +109,7 @@ async def handle_event(self, event):

elif caa_match.group("property").lower().startswith("issue"):
if self._dns_names:
for match in dns_name_regex.finditer(caa_match.group("text")):
for match in dns_name_extraction_regex.finditer(caa_match.group("text")):
start, end = match.span()
name = caa_match.group("text")[start:end]

Expand Down
2 changes: 1 addition & 1 deletion bbot/modules/fullhunt.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,5 +35,5 @@ async def request_url(self, query):
response = await self.api_request(url)
return response

def parse_results(self, r, query):
async def parse_results(self, r, query):
return r.json().get("hosts", [])
Loading

0 comments on commit 328a688

Please sign in to comment.