diff --git a/.github/workflows/distro_tests.yml b/.github/workflows/distro_tests.yml index 95f9d7b5f..4e3f268d7 100644 --- a/.github/workflows/distro_tests.yml +++ b/.github/workflows/distro_tests.yml @@ -24,17 +24,17 @@ jobs: if [ "$ID" = "ubuntu" ] || [ "$ID" = "debian" ] || [ "$ID" = "kali" ] || [ "$ID" = "parrotsec" ]; then export DEBIAN_FRONTEND=noninteractive apt-get update - apt-get -y install curl git bash build-essential libssl-dev zlib1g-dev libbz2-dev libreadline-dev libsqlite3-dev wget llvm libncurses5-dev libncursesw5-dev xz-utils tk-dev libffi-dev liblzma-dev + apt-get -y install curl git bash build-essential docker.io libssl-dev zlib1g-dev libbz2-dev libreadline-dev libsqlite3-dev wget llvm libncurses5-dev libncursesw5-dev xz-utils tk-dev libffi-dev liblzma-dev elif [ "$ID" = "alpine" ]; then - apk add --no-cache bash gcc g++ musl-dev libffi-dev curl git make openssl-dev bzip2-dev zlib-dev xz-dev sqlite-dev + apk add --no-cache bash gcc g++ musl-dev libffi-dev docker curl git make openssl-dev bzip2-dev zlib-dev xz-dev sqlite-dev elif [ "$ID" = "arch" ]; then - pacman -Syu --noconfirm curl git bash base-devel + pacman -Syu --noconfirm curl docker git bash base-devel elif [ "$ID" = "fedora" ]; then - dnf install -y curl git bash gcc make openssl-devel bzip2-devel libffi-devel zlib-devel xz-devel tk-devel gdbm-devel readline-devel sqlite-devel python3-libdnf5 + dnf install -y curl docker git bash gcc make openssl-devel bzip2-devel libffi-devel zlib-devel xz-devel tk-devel gdbm-devel readline-devel sqlite-devel python3-libdnf5 elif [ "$ID" = "gentoo" ]; then echo "media-libs/libglvnd X" >> /etc/portage/package.use/libglvnd emerge-webrsync - emerge --update --newuse dev-vcs/git media-libs/mesa curl bash + emerge --update --newuse dev-vcs/git media-libs/mesa curl docker bash fi fi diff --git a/bbot/db/sql/models.py b/bbot/db/sql/models.py index 7677a181e..b15f4abfa 100644 --- a/bbot/db/sql/models.py +++ b/bbot/db/sql/models.py @@ -3,9 +3,9 @@ import json import logging -from datetime import datetime from pydantic import ConfigDict from typing import List, Optional +from datetime import datetime, timezone from typing_extensions import Annotated from pydantic.functional_validators import AfterValidator from sqlmodel import inspect, Column, Field, SQLModel, JSON, String, DateTime as SQLADateTime @@ -114,6 +114,7 @@ def _get_data(data, type): discovery_context: str = "" discovery_path: List[str] = Field(default=[], sa_type=JSON) parent_chain: List[str] = Field(default=[], sa_type=JSON) + inserted_at: NaiveUTC = Field(default_factory=lambda: datetime.now(timezone.utc)) ### SCAN ### diff --git a/bbot/modules/output/postgres.py b/bbot/modules/output/postgres.py new file mode 100644 index 000000000..b1c8c2659 --- /dev/null +++ b/bbot/modules/output/postgres.py @@ -0,0 +1,49 @@ +from bbot.modules.templates.sql import SQLTemplate + + +class Postgres(SQLTemplate): + watched_events = ["*"] + meta = {"description": "Output scan data to a SQLite database"} + options = { + "username": "postgres", + "password": "bbotislife", + "host": "localhost", + "port": 5432, + "database": "bbot", + } + options_desc = { + "username": "The username to connect to Postgres", + "password": "The password to connect to Postgres", + "host": "The server running Postgres", + "port": "The port to connect to Postgres", + "database": "The database name to connect to", + } + deps_pip = ["sqlmodel", "asyncpg"] + protocol = "postgresql+asyncpg" + + async def create_database(self): + import asyncpg + from sqlalchemy import text + from sqlalchemy.ext.asyncio import create_async_engine + + # Create the engine for the initial connection to the server + initial_engine = create_async_engine(self.connection_string().rsplit("/", 1)[0]) + + async with initial_engine.connect() as conn: + # Check if the database exists + result = await conn.execute(text(f"SELECT 1 FROM pg_database WHERE datname = '{self.database}'")) + database_exists = result.scalar() is not None + + # Create the database if it does not exist + if not database_exists: + # Use asyncpg directly to create the database + raw_conn = await asyncpg.connect( + user=self.username, + password=self.password, + host=self.host, + port=self.port, + ) + try: + await raw_conn.execute(f"CREATE DATABASE {self.database}") + finally: + await raw_conn.close() diff --git a/bbot/modules/output/sqlite.py b/bbot/modules/output/sqlite.py index 68ac60daf..5926c961e 100644 --- a/bbot/modules/output/sqlite.py +++ b/bbot/modules/output/sqlite.py @@ -12,7 +12,7 @@ class SQLite(SQLTemplate): options_desc = { "database": "The path to the sqlite database file", } - deps_pip = ["sqlmodel", "sqlalchemy-utils", "aiosqlite"] + deps_pip = ["sqlmodel", "aiosqlite"] async def setup(self): db_file = self.config.get("database", "") diff --git a/bbot/modules/templates/sql.py b/bbot/modules/templates/sql.py index b075753d3..fa00ad828 100644 --- a/bbot/modules/templates/sql.py +++ b/bbot/modules/templates/sql.py @@ -1,7 +1,6 @@ from sqlmodel import SQLModel from sqlalchemy.orm import sessionmaker from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession -from sqlalchemy_utils.functions import database_exists, create_database from bbot.db.sql.models import Event, Scan, Target from bbot.modules.output.base import BaseOutputModule @@ -10,7 +9,6 @@ class SQLTemplate(BaseOutputModule): meta = {"description": "SQL output module template"} options = { - "protocol": "", "database": "bbot", "username": "", "password": "", @@ -18,7 +16,6 @@ class SQLTemplate(BaseOutputModule): "port": 0, } options_desc = { - "protocol": "The protocol to use to connect to the database", "database": "The database to use", "username": "The username to use to connect to the database", "password": "The password to use to connect to the database", @@ -26,6 +23,8 @@ class SQLTemplate(BaseOutputModule): "port": "The port to use to connect to the database", } + protocol = "" + async def setup(self): self.database = self.config.get("database", "bbot") self.username = self.config.get("username", "") @@ -33,11 +32,6 @@ async def setup(self): self.host = self.config.get("host", "127.0.0.1") self.port = self.config.get("port", 0) - self.log.info(f"Connecting to {self.connection_string(mask_password=True)}") - - self.engine = create_async_engine(self.connection_string()) - # Create a session factory bound to the engine - self.async_session = sessionmaker(self.engine, expire_on_commit=False, class_=AsyncSession) await self.init_database() return True @@ -65,12 +59,19 @@ async def handle_event(self, event): await session.commit() + async def create_database(self): + pass + async def init_database(self): + await self.create_database() + + # Now create the engine for the actual database + self.engine = create_async_engine(self.connection_string()) + # Create a session factory bound to the engine + self.async_session = sessionmaker(self.engine, expire_on_commit=False, class_=AsyncSession) + + # Use the engine directly to create all tables async with self.engine.begin() as conn: - # Check if the database exists using the connection's engine URL - if not await conn.run_sync(lambda sync_conn: database_exists(sync_conn.engine.url)): - await conn.run_sync(lambda sync_conn: create_database(sync_conn.engine.url)) - # Create all tables await conn.run_sync(SQLModel.metadata.create_all) def connection_string(self, mask_password=False): diff --git a/bbot/test/test_step_2/module_tests/base.py b/bbot/test/test_step_2/module_tests/base.py index bb63b57e5..47038e9ae 100644 --- a/bbot/test/test_step_2/module_tests/base.py +++ b/bbot/test/test_step_2/module_tests/base.py @@ -20,6 +20,8 @@ class ModuleTestBase: config_overrides = {} modules_overrides = None log = logging.getLogger("bbot") + # if True, the test will be skipped (useful for tests that require docker) + skip_distro_tests = False class ModuleTest: def __init__( @@ -90,7 +92,7 @@ async def module_test( self, httpx_mock, bbot_httpserver, bbot_httpserver_ssl, monkeypatch, request, caplog, capsys ): # Skip dastardly test if we're in the distro tests (because dastardly uses docker) - if os.getenv("BBOT_DISTRO_TESTS") and self.name == "dastardly": + if os.getenv("BBOT_DISTRO_TESTS") and self.skip_distro_tests: pytest.skip("Skipping module_test for dastardly module due to BBOT_DISTRO_TESTS environment variable") self.log.info(f"Starting {self.name} module test") @@ -112,7 +114,9 @@ async def module_test( @pytest.mark.asyncio async def test_module_run(self, module_test): - self.check(module_test, module_test.events) + from bbot.core.helpers.misc import execute_sync_or_async + + await execute_sync_or_async(self.check, module_test, module_test.events) module_test.log.info(f"Finished {self.name} module test") current_task = asyncio.current_task() tasks = [t for t in asyncio.all_tasks() if t != current_task] diff --git a/bbot/test/test_step_2/module_tests/test_module_dastardly.py b/bbot/test/test_step_2/module_tests/test_module_dastardly.py index cb4a501b8..98fa02453 100644 --- a/bbot/test/test_step_2/module_tests/test_module_dastardly.py +++ b/bbot/test/test_step_2/module_tests/test_module_dastardly.py @@ -7,6 +7,7 @@ class TestDastardly(ModuleTestBase): targets = ["http://127.0.0.1:5556/"] modules_overrides = ["httpx", "dastardly"] + skip_distro_tests = True web_response = """ diff --git a/bbot/test/test_step_2/module_tests/test_module_postgres.py b/bbot/test/test_step_2/module_tests/test_module_postgres.py new file mode 100644 index 000000000..874acdb19 --- /dev/null +++ b/bbot/test/test_step_2/module_tests/test_module_postgres.py @@ -0,0 +1,74 @@ +import time +import asyncio + +from .base import ModuleTestBase + + +class TestPostgres(ModuleTestBase): + targets = ["evilcorp.com"] + skip_distro_tests = True + + async def setup_before_prep(self, module_test): + process = await asyncio.create_subprocess_exec( + "docker", + "run", + "--name", + "bbot-test-postgres", + "--rm", + "-e", + "POSTGRES_PASSWORD=bbotislife", + "-e", + "POSTGRES_USER=postgres", + "-p", + "5432:5432", + "-d", + "postgres", + ) + + import asyncpg + + # wait for the container to start + start_time = time.time() + while True: + try: + # Connect to the default 'postgres' database to create 'bbot' + conn = await asyncpg.connect( + user="postgres", password="bbotislife", database="postgres", host="127.0.0.1" + ) + await conn.execute("CREATE DATABASE bbot") + await conn.close() + break + except asyncpg.exceptions.DuplicateDatabaseError: + # If the database already exists, break the loop + break + except Exception as e: + if time.time() - start_time > 60: # timeout after 60 seconds + self.log.error("PostgreSQL server did not start in time.") + raise e + await asyncio.sleep(1) + + if process.returncode != 0: + self.log.error(f"Failed to start PostgreSQL server") + + async def check(self, module_test, events): + import asyncpg + + # Connect to the PostgreSQL database + conn = await asyncpg.connect(user="postgres", password="bbotislife", database="bbot", host="127.0.0.1") + + try: + events = await conn.fetch("SELECT * FROM event") + assert len(events) == 3, "No events found in PostgreSQL database" + scans = await conn.fetch("SELECT * FROM scan") + assert len(scans) == 1, "No scans found in PostgreSQL database" + targets = await conn.fetch("SELECT * FROM target") + assert len(targets) == 1, "No targets found in PostgreSQL database" + finally: + await conn.close() + process = await asyncio.create_subprocess_exec( + "docker", "stop", "bbot-test-postgres", stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE + ) + stdout, stderr = await process.communicate() + + if process.returncode != 0: + raise Exception(f"Failed to stop PostgreSQL server: {stderr.decode()}") diff --git a/bbot/test/test_step_2/module_tests/test_module_sqlite.py b/bbot/test/test_step_2/module_tests/test_module_sqlite.py index 809d68c47..ec80b7555 100644 --- a/bbot/test/test_step_2/module_tests/test_module_sqlite.py +++ b/bbot/test/test_step_2/module_tests/test_module_sqlite.py @@ -10,9 +10,9 @@ def check(self, module_test, events): assert sqlite_output_file.exists(), "SQLite output file not found" with sqlite3.connect(sqlite_output_file) as db: cursor = db.cursor() - cursor.execute("SELECT * FROM event") - assert len(cursor.fetchall()) > 0, "No events found in SQLite database" - cursor.execute("SELECT * FROM scan") - assert len(cursor.fetchall()) > 0, "No scans found in SQLite database" - cursor.execute("SELECT * FROM target") - assert len(cursor.fetchall()) > 0, "No targets found in SQLite database" + results = cursor.execute("SELECT * FROM event").fetchall() + assert len(results) == 3, "No events found in SQLite database" + results = cursor.execute("SELECT * FROM scan").fetchall() + assert len(results) == 1, "No scans found in SQLite database" + results = cursor.execute("SELECT * FROM target").fetchall() + assert len(results) == 1, "No targets found in SQLite database" diff --git a/docs/scanning/output.md b/docs/scanning/output.md index 7efdf4862..e49e10857 100644 --- a/docs/scanning/output.md +++ b/docs/scanning/output.md @@ -187,6 +187,26 @@ The `sqlite` output module produces a SQLite database containing all events, sca bbot -t evilcorp.com -om sqlite -c modules.sqlite.database=/tmp/bbot.sqlite ``` +### Postgres + +The `postgres` output module allows you to ingest events, scans, and targets into a Postgres database. By default, it will connect to the server on `localhost` with a username of `postgres` and password of `bbotislife`. You can change this behavior in the config. + +```bash +# specifying an alternate database +bbot -t evilcorp.com -om postgres -c modules.postgres.database=custom_bbot_db +``` + +```yaml title="postgres_preset.yml" +config: + modules: + postgres: + host: psq.fsociety.local + database: custom_bbot_db + port: 5432 + username: postgres + password: bbotislife +``` + ### Subdomains The `subdomains` output module produces simple text file containing only in-scope and resolved subdomains: