Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New Module: Elastic Output #2010

Merged
merged 13 commits into from
Nov 22, 2024
36 changes: 22 additions & 14 deletions bbot/core/event/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from copy import copy
from pathlib import Path
from typing import Optional
from zoneinfo import ZoneInfo
from contextlib import suppress
from radixtarget import RadixTarget
from urllib.parse import urljoin, parse_qs
Expand Down Expand Up @@ -40,6 +41,7 @@
validators,
get_file_extension,
)
from bbot.models.helpers import utc_datetime_validator


log = logging.getLogger("bbot.core.event")
Expand Down Expand Up @@ -755,7 +757,7 @@ def __contains__(self, other):
return bool(radixtarget.search(other.host))
return False

def json(self, mode="json", siem_friendly=False):
def json(self, mode="json"):
"""
Serializes the event object to a JSON-compatible dictionary.

Expand All @@ -764,7 +766,6 @@ def json(self, mode="json", siem_friendly=False):

Parameters:
mode (str): Specifies the data serialization mode. Default is "json". Other options include "graph", "human", and "id".
siem_friendly (bool): Whether to format the JSON in a way that's friendly to SIEM ingestion by Elastic, Splunk, etc. This ensures the value of "data" is always the same type (a dictionary).

Returns:
dict: JSON-serializable dictionary representation of the event object.
Expand All @@ -781,10 +782,12 @@ def json(self, mode="json", siem_friendly=False):
data = data_attr
else:
data = smart_decode(self.data)
if siem_friendly:
j["data"] = {self.type: data}
else:
if isinstance(data, str):
j["data"] = data
elif isinstance(data, dict):
j["data_json"] = data
else:
raise ValueError(f"Invalid data type: {type(data)}")
# host, dns children
if self.host:
j["host"] = str(self.host)
Expand All @@ -802,7 +805,7 @@ def json(self, mode="json", siem_friendly=False):
if self.scan:
j["scan"] = self.scan.id
# timestamp
j["timestamp"] = self.timestamp.isoformat()
j["timestamp"] = utc_datetime_validator(self.timestamp).timestamp()
# parent event
parent_id = self.parent_id
if parent_id:
Expand All @@ -811,8 +814,7 @@ def json(self, mode="json", siem_friendly=False):
if parent_uuid:
j["parent_uuid"] = parent_uuid
# tags
if self.tags:
j.update({"tags": list(self.tags)})
j.update({"tags": list(self.tags)})
# parent module
if self.module:
j.update({"module": str(self.module)})
Expand Down Expand Up @@ -1728,7 +1730,7 @@ def make_event(
)


def event_from_json(j, siem_friendly=False):
def event_from_json(j):
"""
Creates an event object from a JSON dictionary.

Expand Down Expand Up @@ -1760,10 +1762,12 @@ def event_from_json(j, siem_friendly=False):
"context": j.get("discovery_context", None),
"dummy": True,
}
if siem_friendly:
data = j["data"][event_type]
else:
data = j["data"]
data = j.get("data_json", None)
if data is None:
data = j.get("data", None)
if data is None:
json_pretty = json.dumps(j, indent=2)
raise ValueError(f"data or data_json must be provided. JSON: {json_pretty}")
kwargs["data"] = data
event = make_event(**kwargs)
event_uuid = j.get("uuid", None)
Expand All @@ -1773,7 +1777,11 @@ def event_from_json(j, siem_friendly=False):
resolved_hosts = j.get("resolved_hosts", [])
event._resolved_hosts = set(resolved_hosts)

event.timestamp = datetime.datetime.fromisoformat(j["timestamp"])
# accept both isoformat and unix timestamp
try:
event.timestamp = datetime.datetime.fromtimestamp(j["timestamp"], ZoneInfo("UTC"))
except Exception:
event.timestamp = datetime.datetime.fromisoformat(j["timestamp"])
event.scope_distance = j["scope_distance"]
parent_id = j.get("parent", None)
if parent_id is not None:
Expand Down
20 changes: 20 additions & 0 deletions bbot/models/helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from datetime import datetime
from zoneinfo import ZoneInfo


def utc_datetime_validator(d: datetime) -> datetime:
"""
Converts all dates into UTC
"""
if d.tzinfo is not None:
return d.astimezone(ZoneInfo("UTC"))
else:
return d.replace(tzinfo=ZoneInfo("UTC"))


def utc_now() -> datetime:
return datetime.now(ZoneInfo("UTC"))


def utc_now_timestamp() -> datetime:
return utc_now().timestamp()
131 changes: 131 additions & 0 deletions bbot/models/pydantic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
import logging
from pydantic import BaseModel, ConfigDict, Field
from typing import Optional, List, Union, Annotated

from bbot.models.helpers import utc_now_timestamp

log = logging.getLogger("bbot_server.models")


class BBOTBaseModel(BaseModel):
model_config = ConfigDict(extra="ignore")

def __hash__(self):
return hash(self.to_json())

def __eq__(self, other):
return hash(self) == hash(other)

@classmethod
def _indexed_fields(cls):
return sorted(field_name for field_name, field in cls.model_fields.items() if "indexed" in field.metadata)

# we keep these because they were a lot of work to make and maybe someday they'll be useful again

# @classmethod
# def _get_type_hints(cls):
# """
# Drills down past all the Annotated, Optional, and Union layers to get the underlying type hint
# """
# type_hints = get_type_hints(cls)
# unwrapped_type_hints = {}
# for field_name in cls.model_fields:
# type_hint = type_hints[field_name]
# while 1:
# if getattr(type_hint, "__origin__", None) in (Annotated, Optional, Union):
# type_hint = type_hint.__args__[0]
# else:
# break
# unwrapped_type_hints[field_name] = type_hint
# return unwrapped_type_hints

# @classmethod
# def _datetime_fields(cls):
# datetime_fields = []
# for field_name, type_hint in cls._get_type_hints().items():
# if type_hint == datetime:
# datetime_fields.append(field_name)
# return sorted(datetime_fields)


### EVENT ###


class Event(BBOTBaseModel):
uuid: Annotated[str, "indexed", "unique"]
id: Annotated[str, "indexed"]
type: Annotated[str, "indexed"]
scope_description: str
data: Annotated[Optional[str], "indexed"] = None
data_json: Optional[dict] = None
host: Annotated[Optional[str], "indexed"] = None
port: Optional[int] = None
netloc: Optional[str] = None
# we store the host in reverse to allow for instant subdomain queries
# this works because indexes are left-anchored, but we need to search starting from the right side
reverse_host: Annotated[Optional[str], "indexed"] = ""
resolved_hosts: Union[List, None] = None
dns_children: Union[dict, None] = None
web_spider_distance: int = 10
scope_distance: int = 10
scan: Annotated[str, "indexed"]
timestamp: Annotated[float, "indexed"]
inserted_at: Annotated[Optional[float], "indexed"] = Field(default_factory=utc_now_timestamp)
parent: Annotated[str, "indexed"]
parent_uuid: Annotated[str, "indexed"]
tags: List = []
module: Annotated[Optional[str], "indexed"] = None
module_sequence: Optional[str] = None
discovery_context: str = ""
discovery_path: List[str] = []
parent_chain: List[str] = []

def __init__(self, **data):
super().__init__(**data)
if self.host:
self.reverse_host = self.host[::-1]

def get_data(self):
if self.data is not None:
return self.data
return self.data_json


### SCAN ###


class Scan(BBOTBaseModel):
id: Annotated[str, "indexed", "unique"]
name: str
status: Annotated[str, "indexed"]
started_at: Annotated[float, "indexed"]
finished_at: Annotated[Optional[float], "indexed"] = None
duration_seconds: Optional[float] = None
duration: Optional[str] = None
target: dict
preset: dict

@classmethod
def from_scan(cls, scan):
return cls(
id=scan.id,
name=scan.name,
status=scan.status,
started_at=scan.started_at,
)


### TARGET ###


class Target(BBOTBaseModel):
name: str = "Default Target"
strict_scope: bool = False
seeds: List = []
whitelist: List = []
blacklist: List = []
hash: Annotated[str, "indexed", "unique"]
scope_hash: Annotated[str, "indexed"]
seed_hash: Annotated[str, "indexed"]
whitelist_hash: Annotated[str, "indexed"]
blacklist_hash: Annotated[str, "indexed"]
36 changes: 12 additions & 24 deletions bbot/db/sql/models.py → bbot/models/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,15 @@

import json
import logging
from datetime import datetime
from pydantic import ConfigDict
from typing import List, Optional
from datetime import datetime, timezone
from typing_extensions import Annotated
from pydantic.functional_validators import AfterValidator
from sqlmodel import inspect, Column, Field, SQLModel, JSON, String, DateTime as SQLADateTime

from bbot.models.helpers import utc_now_timestamp


log = logging.getLogger("bbot_server.models")

Expand All @@ -27,14 +29,6 @@ def naive_datetime_validator(d: datetime):
NaiveUTC = Annotated[datetime, AfterValidator(naive_datetime_validator)]


class CustomJSONEncoder(json.JSONEncoder):
def default(self, obj):
# handle datetime
if isinstance(obj, datetime):
return obj.isoformat()
return super().default(obj)


class BBOTBaseModel(SQLModel):
model_config = ConfigDict(extra="ignore")

Expand All @@ -52,7 +46,7 @@ def validated(self):
return self

def to_json(self, **kwargs):
return json.dumps(self.validated.model_dump(), sort_keys=True, cls=CustomJSONEncoder, **kwargs)
return json.dumps(self.validated.model_dump(), sort_keys=True, **kwargs)

@classmethod
def _pk_column_names(cls):
Expand All @@ -72,20 +66,13 @@ class Event(BBOTBaseModel, table=True):

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
data = self._get_data(self.data, self.type)
self.data = {self.type: data}
if self.host:
self.reverse_host = self.host[::-1]

def get_data(self):
return self._get_data(self.data, self.type)

@staticmethod
def _get_data(data, type):
# handle SIEM-friendly format
if isinstance(data, dict) and list(data) == [type]:
return data[type]
return data
if self.data is not None:
return self.data
return self.data_json

uuid: str = Field(
primary_key=True,
Expand All @@ -94,27 +81,28 @@ def _get_data(data, type):
)
id: str = Field(index=True)
type: str = Field(index=True)
scope_description: str
data: dict = Field(sa_type=JSON)
data: Optional[str] = Field(default=None, index=True)
data_json: Optional[dict] = Field(default=None, sa_type=JSON)
host: Optional[str]
port: Optional[int]
netloc: Optional[str]
scope_description: str
# store the host in reversed form for efficient lookups by domain
reverse_host: Optional[str] = Field(default="", exclude=True, index=True)
resolved_hosts: List = Field(default=[], sa_type=JSON)
dns_children: dict = Field(default={}, sa_type=JSON)
web_spider_distance: int = 10
scope_distance: int = Field(default=10, index=True)
scan: str = Field(index=True)
timestamp: NaiveUTC = Field(index=True)
timestamp: float = Field(index=True)
inserted_at: float = Field(default_factory=utc_now_timestamp)
parent: str = Field(index=True)
tags: List = Field(default=[], sa_type=JSON)
module: str = Field(index=True)
module_sequence: str
discovery_context: str = ""
discovery_path: List[str] = Field(default=[], sa_type=JSON)
parent_chain: List[str] = Field(default=[], sa_type=JSON)
inserted_at: NaiveUTC = Field(default_factory=lambda: datetime.now(timezone.utc))


### SCAN ###
Expand Down
22 changes: 22 additions & 0 deletions bbot/modules/output/elastic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
from .http import HTTP


class Elastic(HTTP):
watched_events = ["*"]
metadata = {
"description": "Send scan results to Elasticsearch",
"created_date": "2022-11-21",
"author": "@TheTechromancer",
}
options = {
"url": "",
"username": "elastic",
"password": "changeme",
"timeout": 10,
}
options_desc = {
"url": "Elastic URL (e.g. https://localhost:9200/<your_index>/_doc)",
"username": "Elastic username",
"password": "Elastic password",
"timeout": "HTTP timeout",
}
Loading
Loading