Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add blacklist regex feature #1899

Merged
merged 29 commits into from
Nov 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
ba4224a
resolve conflict
invalid-email-address Nov 4, 2024
2cd0e4e
add lock
invalid-email-address Nov 4, 2024
a267b6c
steady work
invalid-email-address Oct 30, 2024
8055276
update tags
invalid-email-address Oct 31, 2024
5238929
fix
invalid-email-address Oct 31, 2024
dffe93c
preset tests
invalid-email-address Oct 31, 2024
9bbf31e
more tests
invalid-email-address Nov 1, 2024
70fda2a
bugfixing
invalid-email-address Nov 1, 2024
df9cd27
radixtarget overhaul
invalid-email-address Nov 1, 2024
4d19fe5
add poetry.lock
invalid-email-address Nov 4, 2024
d5da47a
sort arg choices
invalid-email-address Nov 4, 2024
ccb6233
fix dns regex
invalid-email-address Nov 4, 2024
1475df9
fix dastardly tests
invalid-email-address Nov 4, 2024
092a68d
fix host error
invalid-email-address Nov 5, 2024
643269d
fix CSP extractor
invalid-email-address Nov 5, 2024
25d770a
fix tests
invalid-email-address Nov 6, 2024
2e35449
lint
invalid-email-address Nov 6, 2024
fa628fe
documentation, tests for blacklisting by regex
invalid-email-address Nov 6, 2024
99518a1
things
invalid-email-address Nov 6, 2024
bd1cc4d
add log message
invalid-email-address Nov 6, 2024
034cb93
more tests
invalid-email-address Nov 6, 2024
d2797cc
blacked
invalid-email-address Nov 6, 2024
1f3ea4e
fix conflict
invalid-email-address Nov 18, 2024
0d56dcf
add poetry.lock
invalid-email-address Nov 18, 2024
7152663
update docs
invalid-email-address Nov 7, 2024
af6d334
blacked
invalid-email-address Nov 7, 2024
9cd2aa4
fix tests
invalid-email-address Nov 7, 2024
efb2ff1
more tests
invalid-email-address Nov 7, 2024
3fc7ed4
fix bugs, thanks @Sh4d0wHunt3rX :)
invalid-email-address Nov 7, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ jobs:
poetry install
- name: Run tests
run: |
poetry run pytest --exitfirst --reruns 2 -o timeout_func_only=true --timeout 1200 --disable-warnings --log-cli-level=INFO --cov-config=bbot/test/coverage.cfg --cov-report xml:cov.xml --cov=bbot .
poetry run pytest -vv --exitfirst --reruns 2 -o timeout_func_only=true --timeout 1200 --disable-warnings --log-cli-level=INFO --cov-config=bbot/test/coverage.cfg --cov-report xml:cov.xml --cov=bbot .
- name: Upload Debug Logs
uses: actions/upload-artifact@v3
with:
Expand Down
2 changes: 1 addition & 1 deletion bbot/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ async def _main():
if sys.stdin.isatty():

# warn if any targets belong directly to a cloud provider
for event in scan.target.events:
for event in scan.target.seeds.events:
if event.type == "DNS_NAME":
cloudcheck_result = scan.helpers.cloudcheck(event.host)
if cloudcheck_result:
Expand Down
4 changes: 2 additions & 2 deletions bbot/core/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -641,7 +641,7 @@ async def finished_tasks(self, tasks, timeout=None):
except BaseException as e:
if isinstance(e, (TimeoutError, asyncio.exceptions.TimeoutError)):
self.log.warning(f"{self.name}: Timeout after {timeout:,} seconds in finished_tasks({tasks})")
for task in tasks:
for task in list(tasks):
task.cancel()
self._await_cancelled_task(task)
else:
Expand Down Expand Up @@ -683,5 +683,5 @@ async def cancel_all_tasks(self):
for client_id in list(self.tasks):
await self.cancel_task(client_id)
for client_id, tasks in self.child_tasks.items():
for task in tasks:
for task in list(tasks):
await self._await_cancelled_task(task)
25 changes: 23 additions & 2 deletions bbot/core/event/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,6 +341,21 @@ def host_original(self):
return self.host
return self._host_original

@property
def host_filterable(self):
"""
A string version of the event that's used for regex-based blacklisting.

For example, the user can specify "REGEX:.*.evilcorp.com" in their blacklist, and this regex
will be applied against this property.
"""
parsed_url = getattr(self, "parsed_url", None)
if parsed_url is not None:
return parsed_url.geturl()
if self.host is not None:
return str(self.host)
return ""

@property
def port(self):
self.host
Expand Down Expand Up @@ -1114,8 +1129,7 @@ def __init__(self, *args, **kwargs):
class IP_RANGE(DnsEvent):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
net = ipaddress.ip_network(self.data, strict=False)
self.add_tag(f"ipv{net.version}")
self.add_tag(f"ipv{self.host.version}")

def sanitize_data(self, data):
return str(ipaddress.ip_network(str(data), strict=False))
Expand Down Expand Up @@ -1689,6 +1703,13 @@ def make_event(
if event_type == "USERNAME" and validators.soft_validate(data, "email"):
event_type = "EMAIL_ADDRESS"
tags.add("affiliate")
# Convert single-host IP_RANGE to IP_ADDRESS
if event_type == "IP_RANGE":
with suppress(Exception):
net = ipaddress.ip_network(data, strict=False)
if net.prefixlen == net.max_prefixlen:
event_type = "IP_ADDRESS"
data = net.network_address

event_class = globals().get(event_type, DefaultEvent)

Expand Down
9 changes: 8 additions & 1 deletion bbot/core/helpers/bloom.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,15 @@ def _fnv1a_hash(self, data):
hash = (hash * 0x01000193) % 2**32 # 16777619
return hash

def __del__(self):
def close(self):
"""Explicitly close the memory-mapped file."""
self.mmap_file.close()

def __del__(self):
try:
self.close()
except Exception:
pass

def __contains__(self, item):
return self.check(item)
4 changes: 2 additions & 2 deletions bbot/core/helpers/dns/helpers.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import logging

from bbot.core.helpers.regexes import dns_name_regex
from bbot.core.helpers.regexes import dns_name_extraction_regex
from bbot.core.helpers.misc import clean_dns_record, smart_decode

log = logging.getLogger("bbot.core.helpers.dns")
Expand Down Expand Up @@ -198,7 +198,7 @@ def add_result(rdtype, _record):
elif rdtype == "TXT":
for s in record.strings:
s = smart_decode(s)
for match in dns_name_regex.finditer(s):
for match in dns_name_extraction_regex.finditer(s):
start, end = match.span()
host = s[start:end]
add_result(rdtype, host)
Expand Down
7 changes: 4 additions & 3 deletions bbot/core/helpers/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,11 @@
from .regex import RegexHelper
from .wordcloud import WordCloud
from .interactsh import Interactsh
from ...scanner.target import Target
from .depsinstaller import DepsInstaller
from .async_helpers import get_event_loop

from bbot.scanner.target import BaseTarget

log = logging.getLogger("bbot.core.helpers")


Expand Down Expand Up @@ -155,8 +156,8 @@ def clean_old_scans(self):
_filter = lambda x: x.is_dir() and self.regexes.scan_name_regex.match(x.name)
self.clean_old(self.scans_dir, keep=self.keep_old_scans, filter=_filter)

def make_target(self, *events, **kwargs):
return Target(*events, **kwargs)
def make_target(self, *targets, **kwargs):
return BaseTarget(*targets, scan=self.scan, **kwargs)

@property
def config(self):
Expand Down
16 changes: 11 additions & 5 deletions bbot/core/helpers/misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -586,17 +586,18 @@ def is_dns_name(d, include_local=True):
if include_local:
if bbot_regexes.hostname_regex.match(d):
return True
if bbot_regexes.dns_name_regex.match(d):
if bbot_regexes.dns_name_validation_regex.match(d):
return True
return False


def is_ip(d, version=None):
def is_ip(d, version=None, include_network=False):
"""
Checks if the given string or object represents a valid IP address.

Args:
d (str or ipaddress.IPvXAddress): The IP address to check.
include_network (bool, optional): Whether to include network types (IPv4Network or IPv6Network). Defaults to False.
version (int, optional): The IP version to validate (4 or 6). Default is None.

Returns:
Expand All @@ -612,12 +613,17 @@ def is_ip(d, version=None):
>>> is_ip('evilcorp.com')
False
"""
ip = None
try:
ip = ipaddress.ip_address(d)
if version is None or ip.version == version:
return True
except Exception:
pass
if include_network:
try:
ip = ipaddress.ip_network(d, strict=False)
except Exception:
pass
if ip is not None and (version is None or ip.version == version):
return True
return False


Expand Down
3 changes: 2 additions & 1 deletion bbot/core/helpers/regexes.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@

# dns names with periods
_dns_name_regex = r"(?:\w(?:[\w-]{0,100}\w)?\.)+(?:[xX][nN]--)?[^\W_]{1,63}\.?"
dns_name_regex = re.compile(_dns_name_regex, re.I)
dns_name_extraction_regex = re.compile(_dns_name_regex, re.I)
dns_name_validation_regex = re.compile(r"^" + _dns_name_regex + r"$", re.I)

# dns names without periods
_hostname_regex = r"(?!\w*\.\w+)\w(?:[\w-]{0,100}\w)?"
Expand Down
2 changes: 1 addition & 1 deletion bbot/core/helpers/web/web.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def __init__(self, parent_helper):
self.ssl_verify = self.config.get("ssl_verify", False)
engine_debug = self.config.get("engine", {}).get("debug", False)
super().__init__(
server_kwargs={"config": self.config, "target": self.parent_helper.preset.target.radix_only},
server_kwargs={"config": self.config, "target": self.parent_helper.preset.target.minimal},
debug=engine_debug,
)

Expand Down
2 changes: 1 addition & 1 deletion bbot/modules/anubisdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ async def abort_if(self, event):
return True, "DNS name is unresolved"
return await super().abort_if(event)

def parse_results(self, r, query):
async def parse_results(self, r, query):
results = set()
json = r.json()
if json:
Expand Down
2 changes: 1 addition & 1 deletion bbot/modules/baddns.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ async def handle_event(self, event):
context=f'{{module}}\'s "{r_dict["module"]}" module found {{event.type}}: {r_dict["description"]}',
)
else:
self.warning(f"Got unrecognized confidence level: {r['confidence']}")
self.warning(f"Got unrecognized confidence level: {r_dict['confidence']}")

found_domains = r_dict.get("found_domains", None)
if found_domains:
Expand Down
4 changes: 2 additions & 2 deletions bbot/modules/bevigil.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,14 +60,14 @@ async def request_urls(self, query):
url = f"{self.base_url}/{self.helpers.quote(query)}/urls/"
return await self.api_request(url)

def parse_subdomains(self, r, query=None):
async def parse_subdomains(self, r, query=None):
results = set()
subdomains = r.json().get("subdomains")
if subdomains:
results.update(subdomains)
return results

def parse_urls(self, r, query=None):
async def parse_urls(self, r, query=None):
results = set()
urls = r.json().get("urls")
if urls:
Expand Down
2 changes: 1 addition & 1 deletion bbot/modules/binaryedge.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,6 @@ async def request_url(self, query):
url = f"{self.base_url}/query/domains/subdomain/{self.helpers.quote(query)}"
return await self.api_request(url)

def parse_results(self, r, query):
async def parse_results(self, r, query):
j = r.json()
return j.get("events", [])
5 changes: 2 additions & 3 deletions bbot/modules/bufferoverrun.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ async def request_url(self, query):
url = f"{self.commercial_base_url if self.commercial else self.base_url}?q=.{query}"
return await self.api_request(url)

def parse_results(self, r, query):
async def parse_results(self, r, query):
j = r.json()
subdomains_set = set()
if isinstance(j, dict):
Expand All @@ -44,5 +44,4 @@ def parse_results(self, r, query):
subdomain = parts[4].strip()
if subdomain and subdomain.endswith(f".{query}"):
subdomains_set.add(subdomain)
for subdomain in subdomains_set:
yield subdomain
return subdomains_set
4 changes: 2 additions & 2 deletions bbot/modules/builtwith.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ async def request_redirects(self, query):
url = f"{self.base_url}/redirect1/api.json?KEY={{api_key}}&LOOKUP={query}"
return await self.api_request(url)

def parse_domains(self, r, query):
async def parse_domains(self, r, query):
"""
This method returns a set of subdomains.
Each subdomain is an "FQDN" that was reported in the "Detailed Technology Profile" page on builtwith.com
Expand Down Expand Up @@ -92,7 +92,7 @@ def parse_domains(self, r, query):
self.verbose(f"No results for {query}: {error}")
return results_set

def parse_redirects(self, r, query):
async def parse_redirects(self, r, query):
"""
This method creates a set.
Each entry in the set is either an Inbound or Outbound Redirect reported in the "Redirect Profile" page on builtwith.com
Expand Down
6 changes: 4 additions & 2 deletions bbot/modules/c99.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,14 @@ async def request_url(self, query):
url = f"{self.base_url}/subdomainfinder?key={{api_key}}&domain={self.helpers.quote(query)}&json"
return await self.api_request(url)

def parse_results(self, r, query):
async def parse_results(self, r, query):
results = set()
j = r.json()
if isinstance(j, dict):
subdomains = j.get("subdomains", [])
if subdomains:
for s in subdomains:
subdomain = s.get("subdomain", "")
if subdomain:
yield subdomain
results.add(subdomain)
return results
6 changes: 4 additions & 2 deletions bbot/modules/certspotter.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,11 @@ def request_url(self, query):
url = f"{self.base_url}/issuances?domain={self.helpers.quote(query)}&include_subdomains=true&expand=dns_names"
return self.api_request(url, timeout=self.http_timeout + 30)

def parse_results(self, r, query):
async def parse_results(self, r, query):
results = set()
json = r.json()
if json:
for r in json:
for dns_name in r.get("dns_names", []):
yield dns_name.lstrip(".*").rstrip(".")
results.add(dns_name.lstrip(".*").rstrip("."))
return results
6 changes: 4 additions & 2 deletions bbot/modules/chaos.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ async def request_url(self, query):
url = f"{self.base_url}/{domain}/subdomains"
return await self.api_request(url)

def parse_results(self, r, query):
async def parse_results(self, r, query):
results = set()
j = r.json()
subdomains_set = set()
if isinstance(j, dict):
Expand All @@ -39,4 +40,5 @@ def parse_results(self, r, query):
for s in subdomains_set:
full_subdomain = f"{s}.{domain}"
if full_subdomain and full_subdomain.endswith(f".{query}"):
yield full_subdomain
results.add(full_subdomain)
return results
2 changes: 1 addition & 1 deletion bbot/modules/columbus.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ async def request_url(self, query):
url = f"{self.base_url}/{self.helpers.quote(query)}?days=365"
return await self.api_request(url)

def parse_results(self, r, query):
async def parse_results(self, r, query):
results = set()
json = r.json()
if json and isinstance(json, list):
Expand Down
6 changes: 4 additions & 2 deletions bbot/modules/crt.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ async def request_url(self, query):
url = self.helpers.add_get_params(self.base_url, params).geturl()
return await self.api_request(url, timeout=self.http_timeout + 30)

def parse_results(self, r, query):
async def parse_results(self, r, query):
results = set()
j = r.json()
for cert_info in j:
if not type(cert_info) == dict:
Expand All @@ -35,4 +36,5 @@ def parse_results(self, r, query):
domain = cert_info.get("name_value")
if domain:
for d in domain.splitlines():
yield d.lower()
results.add(d.lower())
return results
2 changes: 1 addition & 1 deletion bbot/modules/digitorus.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ async def request_url(self, query):
url = f"{self.base_url}/{self.helpers.quote(query)}"
return await self.helpers.request(url)

def parse_results(self, r, query):
async def parse_results(self, r, query):
results = set()
content = getattr(r, "text", "")
extract_regex = re.compile(r"[\w.-]+\." + query, re.I)
Expand Down
6 changes: 3 additions & 3 deletions bbot/modules/dnscaa.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
#
# Checks for and parses CAA DNS TXT records for IODEF reporting destination email addresses and/or URL's.
#
# NOTE: when the target domain is initially resolved basic "dns_name_regex" matched targets will be extracted so we do not perform that again here.
# NOTE: when the target domain is initially resolved basic "dns_name_extraction_regex" matched targets will be extracted so we do not perform that again here.
#
# Example CAA records,
# 0 iodef "mailto:[email protected]"
Expand All @@ -23,7 +23,7 @@

import re

from bbot.core.helpers.regexes import dns_name_regex, email_regex, url_regexes
from bbot.core.helpers.regexes import dns_name_extraction_regex, email_regex, url_regexes

# Handle '0 iodef "mailto:[email protected]"'
# Handle '1 iodef "https://some.host.tld/caa;"'
Expand Down Expand Up @@ -109,7 +109,7 @@ async def handle_event(self, event):

elif caa_match.group("property").lower().startswith("issue"):
if self._dns_names:
for match in dns_name_regex.finditer(caa_match.group("text")):
for match in dns_name_extraction_regex.finditer(caa_match.group("text")):
start, end = match.span()
name = caa_match.group("text")[start:end]

Expand Down
2 changes: 1 addition & 1 deletion bbot/modules/fullhunt.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,5 +35,5 @@ async def request_url(self, query):
response = await self.api_request(url)
return response

def parse_results(self, r, query):
async def parse_results(self, r, query):
return r.json().get("hosts", [])
Loading
Loading