diff --git a/bbot/db/sql/models.py b/bbot/db/sql/models.py new file mode 100644 index 0000000000..7677a181e2 --- /dev/null +++ b/bbot/db/sql/models.py @@ -0,0 +1,147 @@ +# This file contains SQLModel (Pydantic + SQLAlchemy) models for BBOT events, scans, and targets. +# Used by the SQL output modules, but portable for outside use. + +import json +import logging +from datetime import datetime +from pydantic import ConfigDict +from typing import List, Optional +from typing_extensions import Annotated +from pydantic.functional_validators import AfterValidator +from sqlmodel import inspect, Column, Field, SQLModel, JSON, String, DateTime as SQLADateTime + + +log = logging.getLogger("bbot_server.models") + + +def naive_datetime_validator(d: datetime): + """ + Converts all dates into UTC, then drops timezone information. + + This is needed to prevent inconsistencies in sqlite, because it is timezone-naive. + """ + # drop timezone info + return d.replace(tzinfo=None) + + +NaiveUTC = Annotated[datetime, AfterValidator(naive_datetime_validator)] + + +class CustomJSONEncoder(json.JSONEncoder): + def default(self, obj): + # handle datetime + if isinstance(obj, datetime): + return obj.isoformat() + return super().default(obj) + + +class BBOTBaseModel(SQLModel): + model_config = ConfigDict(extra="ignore") + + def __init__(self, *args, **kwargs): + self._validated = None + super().__init__(*args, **kwargs) + + @property + def validated(self): + try: + if self._validated is None: + self._validated = self.__class__.model_validate(self) + return self._validated + except AttributeError: + return self + + def to_json(self, **kwargs): + return json.dumps(self.validated.model_dump(), sort_keys=True, cls=CustomJSONEncoder, **kwargs) + + @classmethod + def _pk_column_names(cls): + return [column.name for column in inspect(cls).primary_key] + + def __hash__(self): + return hash(self.to_json()) + + def __eq__(self, other): + return hash(self) == hash(other) + + +### EVENT ### + + +class Event(BBOTBaseModel, table=True): + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + data = self._get_data(self.data, self.type) + self.data = {self.type: data} + if self.host: + self.reverse_host = self.host[::-1] + + def get_data(self): + return self._get_data(self.data, self.type) + + @staticmethod + def _get_data(data, type): + # handle SIEM-friendly format + if isinstance(data, dict) and list(data) == [type]: + return data[type] + return data + + uuid: str = Field( + primary_key=True, + index=True, + nullable=False, + ) + id: str = Field(index=True) + type: str = Field(index=True) + scope_description: str + data: dict = Field(sa_type=JSON) + host: Optional[str] + port: Optional[int] + netloc: Optional[str] + # store the host in reversed form for efficient lookups by domain + reverse_host: Optional[str] = Field(default="", exclude=True, index=True) + resolved_hosts: List = Field(default=[], sa_type=JSON) + dns_children: dict = Field(default={}, sa_type=JSON) + web_spider_distance: int = 10 + scope_distance: int = Field(default=10, index=True) + scan: str = Field(index=True) + timestamp: NaiveUTC = Field(index=True) + parent: str = Field(index=True) + tags: List = Field(default=[], sa_type=JSON) + module: str = Field(index=True) + module_sequence: str + discovery_context: str = "" + discovery_path: List[str] = Field(default=[], sa_type=JSON) + parent_chain: List[str] = Field(default=[], sa_type=JSON) + + +### SCAN ### + + +class Scan(BBOTBaseModel, table=True): + id: str = Field(primary_key=True) + name: str + status: str + started_at: NaiveUTC = Field(index=True) + finished_at: Optional[NaiveUTC] = Field(default=None, sa_column=Column(SQLADateTime, nullable=True, index=True)) + duration_seconds: Optional[float] = Field(default=None) + duration: Optional[str] = Field(default=None) + target: dict = Field(sa_type=JSON) + preset: dict = Field(sa_type=JSON) + + +### TARGET ### + + +class Target(BBOTBaseModel, table=True): + name: str = "Default Target" + strict_scope: bool = False + seeds: List = Field(default=[], sa_type=JSON) + whitelist: List = Field(default=None, sa_type=JSON) + blacklist: List = Field(default=[], sa_type=JSON) + hash: str = Field(sa_column=Column("hash", String, unique=True, primary_key=True, index=True)) + scope_hash: str = Field(sa_column=Column("scope_hash", String, index=True)) + seed_hash: str = Field(sa_column=Column("seed_hashhash", String, index=True)) + whitelist_hash: str = Field(sa_column=Column("whitelist_hash", String, index=True)) + blacklist_hash: str = Field(sa_column=Column("blacklist_hash", String, index=True)) diff --git a/bbot/modules/output/sqlite.py b/bbot/modules/output/sqlite.py new file mode 100644 index 0000000000..68ac60dafd --- /dev/null +++ b/bbot/modules/output/sqlite.py @@ -0,0 +1,29 @@ +from pathlib import Path + +from bbot.modules.templates.sql import SQLTemplate + + +class SQLite(SQLTemplate): + watched_events = ["*"] + meta = {"description": "Output scan data to a SQLite database"} + options = { + "database": "", + } + options_desc = { + "database": "The path to the sqlite database file", + } + deps_pip = ["sqlmodel", "sqlalchemy-utils", "aiosqlite"] + + async def setup(self): + db_file = self.config.get("database", "") + if not db_file: + db_file = self.scan.home / "output.sqlite" + db_file = Path(db_file) + if not db_file.is_absolute(): + db_file = self.scan.home / db_file + self.db_file = db_file + self.db_file.parent.mkdir(parents=True, exist_ok=True) + return await super().setup() + + def connection_string(self, mask_password=False): + return f"sqlite+aiosqlite:///{self.db_file}" diff --git a/bbot/modules/templates/sql.py b/bbot/modules/templates/sql.py new file mode 100644 index 0000000000..b075753d3b --- /dev/null +++ b/bbot/modules/templates/sql.py @@ -0,0 +1,89 @@ +from sqlmodel import SQLModel +from sqlalchemy.orm import sessionmaker +from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession +from sqlalchemy_utils.functions import database_exists, create_database + +from bbot.db.sql.models import Event, Scan, Target +from bbot.modules.output.base import BaseOutputModule + + +class SQLTemplate(BaseOutputModule): + meta = {"description": "SQL output module template"} + options = { + "protocol": "", + "database": "bbot", + "username": "", + "password": "", + "host": "127.0.0.1", + "port": 0, + } + options_desc = { + "protocol": "The protocol to use to connect to the database", + "database": "The database to use", + "username": "The username to use to connect to the database", + "password": "The password to use to connect to the database", + "host": "The host to use to connect to the database", + "port": "The port to use to connect to the database", + } + + async def setup(self): + self.database = self.config.get("database", "bbot") + self.username = self.config.get("username", "") + self.password = self.config.get("password", "") + self.host = self.config.get("host", "127.0.0.1") + self.port = self.config.get("port", 0) + + self.log.info(f"Connecting to {self.connection_string(mask_password=True)}") + + self.engine = create_async_engine(self.connection_string()) + # Create a session factory bound to the engine + self.async_session = sessionmaker(self.engine, expire_on_commit=False, class_=AsyncSession) + await self.init_database() + return True + + async def handle_event(self, event): + event_obj = Event(**event.json()).validated + + async with self.async_session() as session: + async with session.begin(): + # insert event + session.add(event_obj) + + # if it's a SCAN event, create/update the scan and target + if event_obj.type == "SCAN": + event_data = event_obj.get_data() + if not isinstance(event_data, dict): + raise ValueError(f"Invalid data for SCAN event: {event_data}") + scan = Scan(**event_data).validated + await session.merge(scan) # Insert or update scan + + target_data = event_data.get("target", {}) + if not isinstance(target_data, dict): + raise ValueError(f"Invalid target for SCAN event: {target_data}") + target = Target(**target_data).validated + await session.merge(target) # Insert or update target + + await session.commit() + + async def init_database(self): + async with self.engine.begin() as conn: + # Check if the database exists using the connection's engine URL + if not await conn.run_sync(lambda sync_conn: database_exists(sync_conn.engine.url)): + await conn.run_sync(lambda sync_conn: create_database(sync_conn.engine.url)) + # Create all tables + await conn.run_sync(SQLModel.metadata.create_all) + + def connection_string(self, mask_password=False): + connection_string = f"{self.protocol}://" + if self.username: + password = self.password + if mask_password: + password = "****" + connection_string += f"{self.username}:{password}" + if self.host: + connection_string += f"@{self.host}" + if self.port: + connection_string += f":{self.port}" + if self.database: + connection_string += f"/{self.database}" + return connection_string diff --git a/bbot/scanner/scanner.py b/bbot/scanner/scanner.py index 34ef29c389..9572153554 100644 --- a/bbot/scanner/scanner.py +++ b/bbot/scanner/scanner.py @@ -161,7 +161,7 @@ def __init__( tries += 1 else: scan_name = str(self.preset.scan_name) - self.name = scan_name + self.name = scan_name.replace("/", "_") # make sure the preset has a description if not self.preset.description: diff --git a/bbot/test/test_step_2/module_tests/test_module_sqlite.py b/bbot/test/test_step_2/module_tests/test_module_sqlite.py new file mode 100644 index 0000000000..809d68c47a --- /dev/null +++ b/bbot/test/test_step_2/module_tests/test_module_sqlite.py @@ -0,0 +1,18 @@ +import sqlite3 +from .base import ModuleTestBase + + +class TestSQLite(ModuleTestBase): + targets = ["evilcorp.com"] + + def check(self, module_test, events): + sqlite_output_file = module_test.scan.home / "output.sqlite" + assert sqlite_output_file.exists(), "SQLite output file not found" + with sqlite3.connect(sqlite_output_file) as db: + cursor = db.cursor() + cursor.execute("SELECT * FROM event") + assert len(cursor.fetchall()) > 0, "No events found in SQLite database" + cursor.execute("SELECT * FROM scan") + assert len(cursor.fetchall()) > 0, "No scans found in SQLite database" + cursor.execute("SELECT * FROM target") + assert len(cursor.fetchall()) > 0, "No targets found in SQLite database" diff --git a/docs/release_history.md b/docs/release_history.md index f93f7d785d..7fd343f513 100644 --- a/docs/release_history.md +++ b/docs/release_history.md @@ -1,4 +1,10 @@ -### 2.1.0 +### 2.1.2 - Nov 1, 2024 +- https://github.com/blacklanternsecurity/bbot/pull/1909 + +### 2.1.1 - Oct 31, 2024 +- https://github.com/blacklanternsecurity/bbot/pull/1885 + +### 2.1.0 - Oct 18, 2024 - https://github.com/blacklanternsecurity/bbot/pull/1724 ### 2.0.1 - Aug 29, 2024 diff --git a/docs/scanning/output.md b/docs/scanning/output.md index 55eaa5469a..7efdf48620 100644 --- a/docs/scanning/output.md +++ b/docs/scanning/output.md @@ -178,6 +178,15 @@ The `asset_inventory` module produces a CSV like this: | www.evilcorp.com | cdn-github | 2.3.4.5 | Active | 22,80,443 | | admin.evilcorp.com | cloud-azure | 5.6.7.8 | N/A | | +### SQLite + +The `sqlite` output module produces a SQLite database containing all events, scans, and targets. By default, it will be saved in the scan directory as `output.sqlite`. + +```bash +# specifying a custom database path +bbot -t evilcorp.com -om sqlite -c modules.sqlite.database=/tmp/bbot.sqlite +``` + ### Subdomains The `subdomains` output module produces simple text file containing only in-scope and resolved subdomains: diff --git a/mkdocs.yml b/mkdocs.yml index a5c29b7574..1802fc678a 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -32,6 +32,7 @@ nav: - Modules: - List of Modules: modules/list_of_modules.md - Nuclei: modules/nuclei.md + - Custom YARA Rules: modules/custom_yara_rules.md - Misc: - Contribution: contribution.md - Release History: release_history.md