Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New Module: Postgres Output #1942

Merged
merged 8 commits into from
Nov 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions .github/workflows/distro_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,17 +24,17 @@ jobs:
if [ "$ID" = "ubuntu" ] || [ "$ID" = "debian" ] || [ "$ID" = "kali" ] || [ "$ID" = "parrotsec" ]; then
export DEBIAN_FRONTEND=noninteractive
apt-get update
apt-get -y install curl git bash build-essential libssl-dev zlib1g-dev libbz2-dev libreadline-dev libsqlite3-dev wget llvm libncurses5-dev libncursesw5-dev xz-utils tk-dev libffi-dev liblzma-dev
apt-get -y install curl git bash build-essential docker.io libssl-dev zlib1g-dev libbz2-dev libreadline-dev libsqlite3-dev wget llvm libncurses5-dev libncursesw5-dev xz-utils tk-dev libffi-dev liblzma-dev
elif [ "$ID" = "alpine" ]; then
apk add --no-cache bash gcc g++ musl-dev libffi-dev curl git make openssl-dev bzip2-dev zlib-dev xz-dev sqlite-dev
apk add --no-cache bash gcc g++ musl-dev libffi-dev docker curl git make openssl-dev bzip2-dev zlib-dev xz-dev sqlite-dev
elif [ "$ID" = "arch" ]; then
pacman -Syu --noconfirm curl git bash base-devel
pacman -Syu --noconfirm curl docker git bash base-devel
elif [ "$ID" = "fedora" ]; then
dnf install -y curl git bash gcc make openssl-devel bzip2-devel libffi-devel zlib-devel xz-devel tk-devel gdbm-devel readline-devel sqlite-devel python3-libdnf5
dnf install -y curl docker git bash gcc make openssl-devel bzip2-devel libffi-devel zlib-devel xz-devel tk-devel gdbm-devel readline-devel sqlite-devel python3-libdnf5
elif [ "$ID" = "gentoo" ]; then
echo "media-libs/libglvnd X" >> /etc/portage/package.use/libglvnd
emerge-webrsync
emerge --update --newuse dev-vcs/git media-libs/mesa curl bash
emerge --update --newuse dev-vcs/git media-libs/mesa curl docker bash
fi
fi
Expand Down
3 changes: 2 additions & 1 deletion bbot/db/sql/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@

import json
import logging
from datetime import datetime
from pydantic import ConfigDict
from typing import List, Optional
from datetime import datetime, timezone
from typing_extensions import Annotated
from pydantic.functional_validators import AfterValidator
from sqlmodel import inspect, Column, Field, SQLModel, JSON, String, DateTime as SQLADateTime
Expand Down Expand Up @@ -114,6 +114,7 @@ def _get_data(data, type):
discovery_context: str = ""
discovery_path: List[str] = Field(default=[], sa_type=JSON)
parent_chain: List[str] = Field(default=[], sa_type=JSON)
inserted_at: NaiveUTC = Field(default_factory=lambda: datetime.now(timezone.utc))


### SCAN ###
Expand Down
49 changes: 49 additions & 0 deletions bbot/modules/output/postgres.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
from bbot.modules.templates.sql import SQLTemplate


class Postgres(SQLTemplate):
watched_events = ["*"]
meta = {"description": "Output scan data to a SQLite database"}
options = {
"username": "postgres",
"password": "bbotislife",
"host": "localhost",
"port": 5432,
"database": "bbot",
}
options_desc = {
"username": "The username to connect to Postgres",
"password": "The password to connect to Postgres",
"host": "The server running Postgres",
"port": "The port to connect to Postgres",
"database": "The database name to connect to",
}
deps_pip = ["sqlmodel", "asyncpg"]
protocol = "postgresql+asyncpg"

async def create_database(self):
import asyncpg
from sqlalchemy import text
from sqlalchemy.ext.asyncio import create_async_engine

# Create the engine for the initial connection to the server
initial_engine = create_async_engine(self.connection_string().rsplit("/", 1)[0])

async with initial_engine.connect() as conn:
# Check if the database exists
result = await conn.execute(text(f"SELECT 1 FROM pg_database WHERE datname = '{self.database}'"))
database_exists = result.scalar() is not None

# Create the database if it does not exist
if not database_exists:
# Use asyncpg directly to create the database
raw_conn = await asyncpg.connect(
user=self.username,
password=self.password,
host=self.host,
port=self.port,
)
try:
await raw_conn.execute(f"CREATE DATABASE {self.database}")
finally:
await raw_conn.close()
2 changes: 1 addition & 1 deletion bbot/modules/output/sqlite.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ class SQLite(SQLTemplate):
options_desc = {
"database": "The path to the sqlite database file",
}
deps_pip = ["sqlmodel", "sqlalchemy-utils", "aiosqlite"]
deps_pip = ["sqlmodel", "aiosqlite"]

async def setup(self):
db_file = self.config.get("database", "")
Expand Down
25 changes: 13 additions & 12 deletions bbot/modules/templates/sql.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from sqlmodel import SQLModel
from sqlalchemy.orm import sessionmaker
from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession
from sqlalchemy_utils.functions import database_exists, create_database

from bbot.db.sql.models import Event, Scan, Target
from bbot.modules.output.base import BaseOutputModule
Expand All @@ -10,34 +9,29 @@
class SQLTemplate(BaseOutputModule):
meta = {"description": "SQL output module template"}
options = {
"protocol": "",
"database": "bbot",
"username": "",
"password": "",
"host": "127.0.0.1",
"port": 0,
}
options_desc = {
"protocol": "The protocol to use to connect to the database",
"database": "The database to use",
"username": "The username to use to connect to the database",
"password": "The password to use to connect to the database",
"host": "The host to use to connect to the database",
"port": "The port to use to connect to the database",
}

protocol = ""

async def setup(self):
self.database = self.config.get("database", "bbot")
self.username = self.config.get("username", "")
self.password = self.config.get("password", "")
self.host = self.config.get("host", "127.0.0.1")
self.port = self.config.get("port", 0)

self.log.info(f"Connecting to {self.connection_string(mask_password=True)}")

self.engine = create_async_engine(self.connection_string())
# Create a session factory bound to the engine
self.async_session = sessionmaker(self.engine, expire_on_commit=False, class_=AsyncSession)
await self.init_database()
return True

Expand Down Expand Up @@ -65,12 +59,19 @@ async def handle_event(self, event):

await session.commit()

async def create_database(self):
pass

async def init_database(self):
await self.create_database()

# Now create the engine for the actual database
self.engine = create_async_engine(self.connection_string())
# Create a session factory bound to the engine
self.async_session = sessionmaker(self.engine, expire_on_commit=False, class_=AsyncSession)

# Use the engine directly to create all tables
async with self.engine.begin() as conn:
# Check if the database exists using the connection's engine URL
if not await conn.run_sync(lambda sync_conn: database_exists(sync_conn.engine.url)):
await conn.run_sync(lambda sync_conn: create_database(sync_conn.engine.url))
# Create all tables
await conn.run_sync(SQLModel.metadata.create_all)

def connection_string(self, mask_password=False):
Expand Down
8 changes: 6 additions & 2 deletions bbot/test/test_step_2/module_tests/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ class ModuleTestBase:
config_overrides = {}
modules_overrides = None
log = logging.getLogger("bbot")
# if True, the test will be skipped (useful for tests that require docker)
skip_distro_tests = False

class ModuleTest:
def __init__(
Expand Down Expand Up @@ -90,7 +92,7 @@ async def module_test(
self, httpx_mock, bbot_httpserver, bbot_httpserver_ssl, monkeypatch, request, caplog, capsys
):
# Skip dastardly test if we're in the distro tests (because dastardly uses docker)
if os.getenv("BBOT_DISTRO_TESTS") and self.name == "dastardly":
if os.getenv("BBOT_DISTRO_TESTS") and self.skip_distro_tests:
pytest.skip("Skipping module_test for dastardly module due to BBOT_DISTRO_TESTS environment variable")

self.log.info(f"Starting {self.name} module test")
Expand All @@ -112,7 +114,9 @@ async def module_test(

@pytest.mark.asyncio
async def test_module_run(self, module_test):
self.check(module_test, module_test.events)
from bbot.core.helpers.misc import execute_sync_or_async

await execute_sync_or_async(self.check, module_test, module_test.events)
module_test.log.info(f"Finished {self.name} module test")
current_task = asyncio.current_task()
tasks = [t for t in asyncio.all_tasks() if t != current_task]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
class TestDastardly(ModuleTestBase):
targets = ["http://127.0.0.1:5556/"]
modules_overrides = ["httpx", "dastardly"]
skip_distro_tests = True

web_response = """<!DOCTYPE html>
<html>
Expand Down
74 changes: 74 additions & 0 deletions bbot/test/test_step_2/module_tests/test_module_postgres.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
import time
import asyncio

from .base import ModuleTestBase


class TestPostgres(ModuleTestBase):
targets = ["evilcorp.com"]
skip_distro_tests = True

async def setup_before_prep(self, module_test):
process = await asyncio.create_subprocess_exec(
"docker",
"run",
"--name",
"bbot-test-postgres",
"--rm",
"-e",
"POSTGRES_PASSWORD=bbotislife",
"-e",
"POSTGRES_USER=postgres",
"-p",
"5432:5432",
"-d",
"postgres",
)

import asyncpg

# wait for the container to start
start_time = time.time()
while True:
try:
# Connect to the default 'postgres' database to create 'bbot'
conn = await asyncpg.connect(
user="postgres", password="bbotislife", database="postgres", host="127.0.0.1"
)
await conn.execute("CREATE DATABASE bbot")
await conn.close()
break
except asyncpg.exceptions.DuplicateDatabaseError:
# If the database already exists, break the loop
break
except Exception as e:
if time.time() - start_time > 60: # timeout after 60 seconds
self.log.error("PostgreSQL server did not start in time.")
raise e
await asyncio.sleep(1)

if process.returncode != 0:
self.log.error(f"Failed to start PostgreSQL server")

async def check(self, module_test, events):
import asyncpg

# Connect to the PostgreSQL database
conn = await asyncpg.connect(user="postgres", password="bbotislife", database="bbot", host="127.0.0.1")

try:
events = await conn.fetch("SELECT * FROM event")
assert len(events) == 3, "No events found in PostgreSQL database"
scans = await conn.fetch("SELECT * FROM scan")
assert len(scans) == 1, "No scans found in PostgreSQL database"
targets = await conn.fetch("SELECT * FROM target")
assert len(targets) == 1, "No targets found in PostgreSQL database"
finally:
await conn.close()
process = await asyncio.create_subprocess_exec(
"docker", "stop", "bbot-test-postgres", stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
)
stdout, stderr = await process.communicate()

if process.returncode != 0:
raise Exception(f"Failed to stop PostgreSQL server: {stderr.decode()}")
12 changes: 6 additions & 6 deletions bbot/test/test_step_2/module_tests/test_module_sqlite.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@ def check(self, module_test, events):
assert sqlite_output_file.exists(), "SQLite output file not found"
with sqlite3.connect(sqlite_output_file) as db:
cursor = db.cursor()
cursor.execute("SELECT * FROM event")
assert len(cursor.fetchall()) > 0, "No events found in SQLite database"
cursor.execute("SELECT * FROM scan")
assert len(cursor.fetchall()) > 0, "No scans found in SQLite database"
cursor.execute("SELECT * FROM target")
assert len(cursor.fetchall()) > 0, "No targets found in SQLite database"
results = cursor.execute("SELECT * FROM event").fetchall()
assert len(results) == 3, "No events found in SQLite database"
results = cursor.execute("SELECT * FROM scan").fetchall()
assert len(results) == 1, "No scans found in SQLite database"
results = cursor.execute("SELECT * FROM target").fetchall()
assert len(results) == 1, "No targets found in SQLite database"
20 changes: 20 additions & 0 deletions docs/scanning/output.md
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,26 @@ The `sqlite` output module produces a SQLite database containing all events, sca
bbot -t evilcorp.com -om sqlite -c modules.sqlite.database=/tmp/bbot.sqlite
```

### Postgres

The `postgres` output module allows you to ingest events, scans, and targets into a Postgres database. By default, it will connect to the server on `localhost` with a username of `postgres` and password of `bbotislife`. You can change this behavior in the config.

```bash
# specifying an alternate database
bbot -t evilcorp.com -om postgres -c modules.postgres.database=custom_bbot_db
```

```yaml title="postgres_preset.yml"
config:
modules:
postgres:
host: psq.fsociety.local
database: custom_bbot_db
port: 5432
username: postgres
password: bbotislife
```

### Subdomains

The `subdomains` output module produces simple text file containing only in-scope and resolved subdomains:
Expand Down
Loading