diff --git a/README.md b/README.md index ee5790ee5d..b979a1b6ee 100644 --- a/README.md +++ b/README.md @@ -383,6 +383,7 @@ For details, see [Configuration](https://www.blacklanternsecurity.com/bbot/Stabl - [List of Modules](https://www.blacklanternsecurity.com/bbot/Stable/modules/list_of_modules) - [Nuclei](https://www.blacklanternsecurity.com/bbot/Stable/modules/nuclei) - [Custom YARA Rules](https://www.blacklanternsecurity.com/bbot/Stable/modules/custom_yara_rules) + - [Lightfuzz](https://www.blacklanternsecurity.com/bbot/Stable/modules/lightfuzz) - **Misc** - [Contribution](https://www.blacklanternsecurity.com/bbot/Stable/contribution) - [Release History](https://www.blacklanternsecurity.com/bbot/Stable/release_history) diff --git a/bbot/core/event/base.py b/bbot/core/event/base.py index f669c65ff7..3c22364d29 100644 --- a/bbot/core/event/base.py +++ b/bbot/core/event/base.py @@ -9,9 +9,9 @@ import ipaddress import traceback -from copy import copy from pathlib import Path from typing import Optional +from copy import copy, deepcopy from contextlib import suppress from radixtarget import RadixTarget from pydantic import BaseModel, field_validator @@ -40,6 +40,7 @@ validators, get_file_extension, ) +from bbot.core.helpers.web.envelopes import BaseEnvelope log = logging.getLogger("bbot.core.event") @@ -589,6 +590,10 @@ def parent(self, parent): elif not self._dummy: log.warning(f"Tried to set invalid parent on {self}: (got: {parent})") + @property + def children(self): + return [] + @property def parent_id(self): parent_id = getattr(self.get_parent(), "id", None) @@ -643,6 +648,13 @@ def get_parents(self, omit=False, include_self=False): e = parent return parents + def clone(self): + # Create a shallow copy of the event first + cloned_event = copy(self) + # Re-assign a new UUID + cloned_event._uuid = uuid.uuid4() + return cloned_event + def _host(self): return "" @@ -824,7 +836,13 @@ def json(self, mode="json", siem_friendly=False): j["discovery_path"] = self.discovery_path j["parent_chain"] = self.parent_chain + # parameter envelopes + parameter_envelopes = getattr(self, "envelopes", None) + if parameter_envelopes is not None: + j["envelopes"] = parameter_envelopes.to_dict() + # normalize non-primitive python objects + for k, v in list(j.items()): if k == "data": continue @@ -1307,12 +1325,56 @@ class URL_HINT(URL_UNVERIFIED): class WEB_PARAMETER(DictHostEvent): + @property + def children(self): + # if we have any subparams, raise a new WEB_PARAMETER for each one + children = [] + envelopes = getattr(self, "envelopes", None) + if envelopes is not None: + subparams = sorted(list(self.envelopes.get_subparams())) + + if envelopes.selected_subparam is None: + current_subparam = subparams[0] + envelopes.selected_subparam = current_subparam[0] + if len(subparams) > 1: + for subparam, _ in subparams[1:]: + clone = self.clone() + clone.envelopes = deepcopy(envelopes) + clone.envelopes.selected_subparam = subparam + clone.parent = self + children.append(clone) + return children + + def sanitize_data(self, data): + original_value = data.get("original_value", None) + if original_value is not None: + try: + envelopes = BaseEnvelope.detect(original_value) + setattr(self, "envelopes", envelopes) + except ValueError as e: + log.verbose(f"Error detecting envelopes for {self}: {e}") + return data + def _data_id(self): # dedupe by url:name:param_type url = self.data.get("url", "") name = self.data.get("name", "") param_type = self.data.get("type", "") - return f"{url}:{name}:{param_type}" + envelopes = getattr(self, "envelopes", "") + subparam = getattr(envelopes, "selected_subparam", "") + + return f"{url}:{name}:{param_type}:{subparam}" + + def _outgoing_dedup_hash(self, event): + return hash( + ( + str(event.host), + event.data["url"], + event.data.get("name", ""), + event.data.get("type", ""), + event.data.get("envelopes", ""), + ) + ) def _url(self): return self.data["url"] @@ -1730,7 +1792,6 @@ def make_event( data = net.network_address event_class = globals().get(event_type, DefaultEvent) - return event_class( data, event_type=event_type, @@ -1790,7 +1851,6 @@ def event_from_json(j, siem_friendly=False): resolved_hosts = j.get("resolved_hosts", []) event._resolved_hosts = set(resolved_hosts) - event.timestamp = datetime.datetime.fromisoformat(j["timestamp"]) event.scope_distance = j["scope_distance"] parent_id = j.get("parent", None) diff --git a/bbot/core/helpers/diff.py b/bbot/core/helpers/diff.py index ea7ca3a864..73f3adef12 100644 --- a/bbot/core/helpers/diff.py +++ b/bbot/core/helpers/diff.py @@ -15,22 +15,24 @@ def __init__( parent_helper, method="GET", data=None, + json=None, allow_redirects=False, include_cache_buster=True, headers=None, cookies=None, - timeout=15, + timeout=10, ): self.parent_helper = parent_helper self.baseline_url = baseline_url self.include_cache_buster = include_cache_buster self.method = method self.data = data + self.json = json self.allow_redirects = allow_redirects self._baselined = False self.headers = headers self.cookies = cookies - self.timeout = 15 + self.timeout = 10 @staticmethod def merge_dictionaries(headers1, headers2): @@ -53,6 +55,7 @@ async def _baseline(self): follow_redirects=self.allow_redirects, method=self.method, data=self.data, + json=self.json, headers=self.headers, cookies=self.cookies, retries=2, @@ -76,6 +79,7 @@ async def _baseline(self): follow_redirects=self.allow_redirects, method=self.method, data=self.data, + json=self.json, retries=2, timeout=self.timeout, ) @@ -103,11 +107,9 @@ async def _baseline(self): for k in ddiff.keys(): for x in list(ddiff[k]): - log.debug(f"Added {k} filter for path: {x.path()}") self.ddiff_filters.append(x.path()) self.baseline_json = baseline_1_json - self.baseline_ignore_headers = [ h.lower() for h in [ @@ -158,7 +160,6 @@ def compare_body(self, content_1, content_2): if len(ddiff.keys()) == 0: return True else: - log.debug(ddiff) return False async def compare( @@ -169,6 +170,7 @@ async def compare( check_reflection=False, method="GET", data=None, + json=None, allow_redirects=False, timeout=None, ): @@ -199,6 +201,7 @@ async def compare( follow_redirects=allow_redirects, method=method, data=data, + json=json, timeout=timeout, ) diff --git a/bbot/core/helpers/helper.py b/bbot/core/helpers/helper.py index 78ccf67155..a953746807 100644 --- a/bbot/core/helpers/helper.py +++ b/bbot/core/helpers/helper.py @@ -129,7 +129,8 @@ def http_compare( cookies=None, method="GET", data=None, - timeout=15, + json=None, + timeout=10, ): return HttpCompare( url, @@ -141,6 +142,7 @@ def http_compare( timeout=timeout, method=method, data=data, + json=json, ) def temp_filename(self, extension=None): diff --git a/bbot/core/helpers/misc.py b/bbot/core/helpers/misc.py index 92c9e523fd..688f9f599c 100644 --- a/bbot/core/helpers/misc.py +++ b/bbot/core/helpers/misc.py @@ -2,13 +2,16 @@ import sys import copy import json +import math import random import string import asyncio import logging import ipaddress +import ahocorasick import regex as re import subprocess as sp + from pathlib import Path from contextlib import suppress from unidecode import unidecode # noqa F401 @@ -797,17 +800,14 @@ def recursive_decode(data, max_depth=5): return data -rand_pool = string.ascii_lowercase -rand_pool_digits = rand_pool + string.digits - - -def rand_string(length=10, digits=True): +def rand_string(length=10, digits=True, numeric_only=False): """ Generates a random string of specified length. Args: length (int, optional): The length of the random string. Defaults to 10. digits (bool, optional): Whether to include digits in the string. Defaults to True. + numeric_only (bool, optional): Whether to generate a numeric-only string. Defaults to False. Returns: str: A random string of the specified length. @@ -819,11 +819,17 @@ def rand_string(length=10, digits=True): 'ap4rsdtg5iw7ey7y3oa5' >>> rand_string(30, digits=False) 'xdmyxtglqfzqktngkesyulwbfrihva' + >>> rand_string(15, numeric_only=True) + '934857349857395' """ - pool = rand_pool - if digits: - pool = rand_pool_digits - return "".join([random.choice(pool) for _ in range(int(length))]) + if numeric_only: + pool = string.digits + elif digits: + pool = string.ascii_lowercase + string.digits + else: + pool = string.ascii_lowercase + + return "".join(random.choice(pool) for _ in range(length)) def truncate_string(s, n): @@ -921,6 +927,7 @@ def extract_params_xml(xml_data, compare_mode="getparam"): "getparam": {chr(c) for c in range(33, 127) if chr(c) not in ":/?#[]@!$&'()*+,;="}, "postparam": {chr(c) for c in range(33, 127) if chr(c) not in ":/?#[]@!$&'()*+,;="}, "cookie": {chr(c) for c in range(33, 127) if chr(c) not in '()<>@,;:"/[]?={} \t'}, + "bodyjson": set(chr(c) for c in range(33, 127) if chr(c) not in ":/?#[]@!$&'()*+,;="), } @@ -2772,6 +2779,35 @@ def clean_dict(d, *key_names, fuzzy=False, exclude_keys=None, _prev_key=None): return d +def string_scan(substrings, text, case_insensitive=True): + automaton = ahocorasick.Automaton() + if case_insensitive: + substrings = [s.lower() for s in substrings] + text = text.lower() + for idx, substring in enumerate(substrings): + automaton.add_word(substring, (idx, substring)) + automaton.make_automaton() + found_substrings = [] + for end_index, (insert_order, original_value) in automaton.iter(text): + found_substrings.append(original_value) + return found_substrings + + +def calculate_entropy(data): + """Calculate the Shannon entropy of a byte sequence""" + if not data: + return 0 + frequency = {} + for byte in data: + if byte in frequency: + frequency[byte] += 1 + else: + frequency[byte] = 1 + data_len = len(data) + entropy = -sum((count / data_len) * math.log2(count / data_len) for count in frequency.values()) + return entropy + + top_ports_cache = None @@ -2825,3 +2861,15 @@ def clean_requirement(req_string): dist = distribution("bbot") return [clean_requirement(r) for r in dist.requires] + + +def is_printable(s): + """ + Check if a string is printable + """ + if not isinstance(s, str): + raise ValueError(f"Expected a string, got {type(s)}") + + # Exclude control characters that break display/printing + s = set(s) + return all(ord(c) >= 32 or c in "\t\n\r" for c in s) diff --git a/bbot/core/helpers/regexes.py b/bbot/core/helpers/regexes.py index adf8abb650..8e162a3262 100644 --- a/bbot/core/helpers/regexes.py +++ b/bbot/core/helpers/regexes.py @@ -111,25 +111,55 @@ # For use with excavate parameters extractor input_tag_regex = re.compile( - r"]+?name=[\"\']?([\.$\w]+)[\"\']?(?:[^>]*?value=[\"\']([=+\/\w]*)[\"\'])?[^>]*>" + r"]*?\sname=[\"\']?([\-\._=+\/\w]+)[\"\']?[^>]*?\svalue=[\"\']?([:%\-\._=+\/\w]*)[\"\']?[^>]*?>" ) -jquery_get_regex = re.compile(r"url:\s?[\"\'].+?\?(\w+)=") -jquery_post_regex = re.compile(r"\$.post\([\'\"].+[\'\"].+\{(.+)\}") +input_tag_regex2 = re.compile( + r"]*?\svalue=[\"\']?([:\-%\._=+\/\w]*)[\"\']?[^>]*?\sname=[\"\']?([\-\._=+\/\w]+)[\"\']?[^>]*?>" +) +input_tag_novalue_regex = re.compile(r"]*\bvalue=)[^>]*?name=[\"\']?([\-\._=+\/\w]*)[\"\']?[^>]*?>") +input_tag_novalue_regex = re.compile(r"]*\b\svalue=)[^>]*?\sname=[\"\']?([\-\._=+\/\w]*)[\"\']?[^>]*?>") +# jquery_get_regex = re.compile(r"url:\s?[\"\'].+?\?(\w+)=") +# jquery_get_regex = re.compile(r"\$.get\([\'\"].+[\'\"].+\{(.+)\}") +# jquery_post_regex = re.compile(r"\$.post\([\'\"].+[\'\"].+\{(.+)\}") a_tag_regex = re.compile(r"]*href=[\"\']([^\"\'?>]*)\?([^&\"\'=]+)=([^&\"\'=]+)") img_tag_regex = re.compile(r"]*src=[\"\']([^\"\'?>]*)\?([^&\"\'=]+)=([^&\"\'=]+)") get_form_regex = re.compile( - r" - +