Skip to content

Commit

Permalink
Merge branch 'dev' into firebase_bucket_bug
Browse files Browse the repository at this point in the history
  • Loading branch information
oj-sec authored Feb 20, 2024
2 parents cfde2db + 65907a3 commit 7647b28
Show file tree
Hide file tree
Showing 131 changed files with 2,574 additions and 1,475 deletions.
1 change: 1 addition & 0 deletions dependabot.yml → .github/dependabot.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@ updates:
directory: "/"
schedule:
interval: "weekly"
target-branch: "dev"
open-pull-requests-limit: 10
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
__pycache__/
.coverage*
80 changes: 54 additions & 26 deletions README.md

Large diffs are not rendered by default.

91 changes: 50 additions & 41 deletions bbot/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@
import asyncio
import logging
import traceback
from aioconsole import ainput
from omegaconf import OmegaConf
from contextlib import suppress
from aioconsole import stream

# fix tee buffering
sys.stdout.reconfigure(line_buffering=True)
Expand All @@ -20,6 +20,7 @@
from bbot import __version__
from bbot.modules import module_loader
from bbot.core.configurator.args import parser
from bbot.core.helpers.misc import smart_decode
from bbot.core.helpers.logger import log_to_stderr
from bbot.core.configurator import ensure_config_files, check_cli_args, environ

Expand Down Expand Up @@ -88,8 +89,6 @@ async def _main():
sys.exit(0)
return

log.verbose(f'Command: {" ".join(sys.argv)}')

if options.agent_mode:
from bbot.agent import Agent

Expand Down Expand Up @@ -303,46 +302,56 @@ async def _main():

if not options.dry_run:
log.trace(f"Command: {' '.join(sys.argv)}")
if not options.agent_mode and not options.yes and sys.stdin.isatty():
log.hugesuccess(f"Scan ready. Press enter to execute {scanner.name}")
input()

def handle_keyboard_input(keyboard_input):
kill_regex = re.compile(r"kill (?P<module>[a-z0-9_]+)")
if keyboard_input:
log.verbose(f'Got keyboard input: "{keyboard_input}"')
kill_match = kill_regex.match(keyboard_input)
if kill_match:
module = kill_match.group("module")
if module in scanner.modules:
log.hugewarning(f'Killing module: "{module}"')
scanner.manager.kill_module(module, message="killed by user")
else:
log.warning(f'Invalid module: "{module}"')
else:
toggle_log_level(logger=log)
scanner.manager.modules_status(_log=True)

async def akeyboard_listen():
allowed_errors = 10
while 1:
keyboard_input = "a"
if sys.stdin.isatty():
if not options.agent_mode and not options.yes:
log.hugesuccess(f"Scan ready. Press enter to execute {scanner.name}")
input()

def handle_keyboard_input(keyboard_input):
kill_regex = re.compile(r"kill (?P<module>[a-z0-9_]+)")
if keyboard_input:
log.verbose(f'Got keyboard input: "{keyboard_input}"')
kill_match = kill_regex.match(keyboard_input)
if kill_match:
module = kill_match.group("module")
if module in scanner.modules:
log.hugewarning(f'Killing module: "{module}"')
scanner.manager.kill_module(module, message="killed by user")
else:
log.warning(f'Invalid module: "{module}"')
else:
toggle_log_level(logger=log)
scanner.manager.modules_status(_log=True)

# Reader
reader = stream.StandardStreamReader()
protocol = stream.StandardStreamReaderProtocol(reader)
await asyncio.get_event_loop().connect_read_pipe(lambda: protocol, sys.stdin)

async def akeyboard_listen():
try:
keyboard_input = await ainput()
except Exception:
allowed_errors -= 1
handle_keyboard_input(keyboard_input)
if allowed_errors <= 0:
break

try:
keyboard_listen_task = asyncio.create_task(akeyboard_listen())

await scanner.async_start_without_generator()
finally:
keyboard_listen_task.cancel()
with suppress(asyncio.CancelledError):
await keyboard_listen_task
allowed_errors = 10
while 1:
keyboard_input = None
try:
keyboard_input = smart_decode((await reader.readline()).strip())
allowed_errors = 10
except Exception as e:
log_to_stderr(f"Error in keyboard listen loop: {e}", level="TRACE")
log_to_stderr(traceback.format_exc(), level="TRACE")
allowed_errors -= 1
if keyboard_input is not None:
handle_keyboard_input(keyboard_input)
if allowed_errors <= 0:
break
except Exception as e:
log_to_stderr(f"Error in keyboard listen task: {e}", level="ERROR")
log_to_stderr(traceback.format_exc(), level="TRACE")

asyncio.create_task(akeyboard_listen())

await scanner.async_start_without_generator()

except bbot.core.errors.ScanError as e:
log_to_stderr(str(e), level="ERROR")
Expand Down
85 changes: 70 additions & 15 deletions bbot/core/event/base.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import re
import json
import asyncio
import logging
Expand All @@ -6,25 +7,29 @@
from typing import Optional
from datetime import datetime
from contextlib import suppress
from urllib.parse import urljoin
from pydantic import BaseModel, field_validator

from .helpers import *
from bbot.core.errors import *
from bbot.core.helpers import (
extract_words,
split_host_port,
get_file_extension,
host_in_host,
is_domain,
is_subdomain,
is_ip,
is_ptr,
is_uri,
domain_stem,
make_netloc,
make_ip_type,
recursive_decode,
smart_decode,
get_file_extension,
validators,
split_host_port,
tagify,
validators,
truncate_string,
)


Expand Down Expand Up @@ -485,7 +490,7 @@ def data_human(self):
return self._data_human()

def _data_human(self):
return str(self.data)
return truncate_string(str(self.data), n=2000)

def _data_load(self, data):
"""
Expand Down Expand Up @@ -560,7 +565,7 @@ def __contains__(self, other):
return host_in_host(other.host, self.host)
return False

def json(self, mode="json"):
def json(self, mode="json", siem_friendly=False):
"""
Serializes the event object to a JSON-compatible dictionary.
Expand All @@ -569,6 +574,7 @@ def json(self, mode="json"):
Parameters:
mode (str): Specifies the data serialization mode. Default is "json". Other options include "graph", "human", and "id".
siem_friendly (bool): Whether to format the JSON in a way that's friendly to SIEM ingestion by Elastic, Splunk, etc. This ensures the value of "data" is always the same type (a dictionary).
Returns:
dict: JSON-serializable dictionary representation of the event object.
Expand All @@ -580,9 +586,13 @@ def json(self, mode="json"):
j.update({i: v})
data_attr = getattr(self, f"data_{mode}", None)
if data_attr is not None:
j["data"] = data_attr
data = data_attr
else:
j["data"] = smart_decode(self.data)
data = smart_decode(self.data)
if siem_friendly:
j["data"] = {self.type: data}
else:
j["data"] = data
web_spider_distance = getattr(self, "web_spider_distance", None)
if web_spider_distance is not None:
j["web_spider_distance"] = web_spider_distance
Expand Down Expand Up @@ -866,6 +876,8 @@ def _words(self):


class URL_UNVERIFIED(BaseEvent):
_status_code_regex = re.compile(r"^status-(\d{1,3})$")

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.web_spider_distance = getattr(self.source, "web_spider_distance", 0)
Expand Down Expand Up @@ -921,6 +933,14 @@ def _data_id(self):
data = "spider-danger" + data
return data

@property
def http_status(self):
for t in self.tags:
match = self._status_code_regex.match(t)
if match:
return int(match.groups()[0])
return 0


class URL(URL_UNVERIFIED):
def sanitize_data(self, data):
Expand Down Expand Up @@ -973,7 +993,7 @@ def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
# count number of consecutive redirects
self.num_redirects = getattr(self.source, "num_redirects", 0)
if str(self.data.get("status_code", 0)).startswith("3"):
if str(self.http_status).startswith("3"):
self.num_redirects += 1

def sanitize_data(self, data):
Expand Down Expand Up @@ -1001,6 +1021,34 @@ def _words(self):
def _pretty_string(self):
return f'{self.data["hash"]["header_mmh3"]}:{self.data["hash"]["body_mmh3"]}'

@property
def http_status(self):
try:
return int(self.data.get("status_code", 0))
except (ValueError, TypeError):
return 0

@property
def http_title(self):
http_title = self.data.get("title", "")
try:
return recursive_decode(http_title)
except Exception:
return http_title

@property
def redirect_location(self):
location = self.data.get("location", "")
# if it's a redirect
if location:
# get the url scheme
scheme = is_uri(location, return_scheme=True)
# if there's no scheme (i.e. it's a relative redirect)
if not scheme:
# then join the location with the current url
location = urljoin(self.parsed.geturl(), location)
return location


class VULNERABILITY(DictHostEvent):
_always_emit = True
Expand Down Expand Up @@ -1123,6 +1171,7 @@ class SOCIAL(DictEvent):

class WEBSCREENSHOT(DictHostEvent):
_always_emit = True
_quick_emit = True


class AZURE_TENANT(DictEvent):
Expand Down Expand Up @@ -1203,10 +1252,11 @@ def make_event(
"""

# allow tags to be either a string or an array
if tags is not None:
if isinstance(tags, str):
tags = [tags]
tags = list(tags)
if not tags:
tags = []
elif isinstance(tags, str):
tags = [tags]
tags = list(tags)

if is_event(data):
if scan is not None and not data.scan:
Expand Down Expand Up @@ -1267,7 +1317,7 @@ def make_event(
)


def event_from_json(j):
def event_from_json(j, siem_friendly=False):
"""
Creates an event object from a JSON dictionary.
Expand All @@ -1290,14 +1340,19 @@ def event_from_json(j):
if required keys are missing. Make sure to validate the JSON input beforehand.
"""
try:
event_type = j["type"]
kwargs = {
"data": j["data"],
"event_type": j["type"],
"event_type": event_type,
"scans": j.get("scans", []),
"tags": j.get("tags", []),
"confidence": j.get("confidence", 5),
"dummy": True,
}
if siem_friendly:
data = j["data"][event_type]
else:
data = j["data"]
kwargs["data"] = data
event = make_event(**kwargs)

resolved_hosts = j.get("resolved_hosts", [])
Expand Down
Loading

0 comments on commit 7647b28

Please sign in to comment.