From a8fb52b886bd12c0ae4c6900446a472c83834eda Mon Sep 17 00:00:00 2001 From: github-actions Date: Thu, 21 Nov 2024 20:42:49 -0500 Subject: [PATCH 01/26] bump version --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 8315373da9..ba84c0bb49 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "bbot" -version = "2.3.0" +version = "3.0.0" description = "OSINT automation for hackers." authors = [ "TheTechromancer", @@ -103,7 +103,7 @@ extend-exclude = "(test_step_1/test_manager_*)" [tool.poetry-dynamic-versioning] enable = true metadata = false -format-jinja = 'v2.3.0{% if branch == "dev" %}.{{ distance }}rc{% endif %}' +format-jinja = 'v3.0.0{% if branch == "dev" %}.{{ distance }}rc{% endif %}' [tool.poetry-dynamic-versioning.substitution] files = ["*/__init__.py"] From 741fdc91882cdb27775ae7e91df2e44879843bcd Mon Sep 17 00:00:00 2001 From: github-actions Date: Tue, 19 Nov 2024 23:22:52 -0500 Subject: [PATCH 02/26] fix conflict --- bbot/core/event/base.py | 6 +-- bbot/scanner/scanner.py | 3 +- bbot/test/bbot_fixtures.py | 76 ++++++++++++++++++++++++++------------ 3 files changed, 58 insertions(+), 27 deletions(-) diff --git a/bbot/core/event/base.py b/bbot/core/event/base.py index ce627f6959..5408dadd9f 100644 --- a/bbot/core/event/base.py +++ b/bbot/core/event/base.py @@ -40,6 +40,7 @@ validators, get_file_extension, ) +from bbot.db.helpers import naive_datetime_validator log = logging.getLogger("bbot.core.event") @@ -802,7 +803,7 @@ def json(self, mode="json", siem_friendly=False): if self.scan: j["scan"] = self.scan.id # timestamp - j["timestamp"] = self.timestamp.isoformat() + j["timestamp"] = naive_datetime_validator(self.timestamp).isoformat() # parent event parent_id = self.parent_id if parent_id: @@ -811,8 +812,7 @@ def json(self, mode="json", siem_friendly=False): if parent_uuid: j["parent_uuid"] = parent_uuid # tags - if self.tags: - j.update({"tags": list(self.tags)}) + j.update({"tags": list(self.tags)}) # parent module if self.module: j.update({"module": str(self.module)}) diff --git a/bbot/scanner/scanner.py b/bbot/scanner/scanner.py index 49114a5b5d..62e5c9d3ab 100644 --- a/bbot/scanner/scanner.py +++ b/bbot/scanner/scanner.py @@ -500,7 +500,8 @@ async def setup_modules(self, remove_failed=True): self.modules[module.name].set_error_state() hard_failed.append(module.name) else: - self.info(f"Setup soft-failed for {module.name}: {msg}") + log_fn = self.warning if module._type == "output" else self.info + log_fn(f"Setup soft-failed for {module.name}: {msg}") soft_failed.append(module.name) if (not status) and (module._intercept or remove_failed): # if a intercept module fails setup, we always remove it diff --git a/bbot/test/bbot_fixtures.py b/bbot/test/bbot_fixtures.py index e1e3aa1b8b..4d73d036c1 100644 --- a/bbot/test/bbot_fixtures.py +++ b/bbot/test/bbot_fixtures.py @@ -147,48 +147,78 @@ def helpers(scan): @pytest.fixture def events(scan): + + dummy_module = scan._make_dummy_module("dummy_module") + class bbot_events: - localhost = scan.make_event("127.0.0.1", parent=scan.root_event) - ipv4 = scan.make_event("8.8.8.8", parent=scan.root_event) - netv4 = scan.make_event("8.8.8.8/30", parent=scan.root_event) - ipv6 = scan.make_event("2001:4860:4860::8888", parent=scan.root_event) - netv6 = scan.make_event("2001:4860:4860::8888/126", parent=scan.root_event) - domain = scan.make_event("publicAPIs.org", parent=scan.root_event) - subdomain = scan.make_event("api.publicAPIs.org", parent=scan.root_event) - email = scan.make_event("bob@evilcorp.co.uk", "EMAIL_ADDRESS", parent=scan.root_event) - open_port = scan.make_event("api.publicAPIs.org:443", parent=scan.root_event) + localhost = scan.make_event("127.0.0.1", parent=scan.root_event, module=dummy_module) + ipv4 = scan.make_event("8.8.8.8", parent=scan.root_event, module=dummy_module) + netv4 = scan.make_event("8.8.8.8/30", parent=scan.root_event, module=dummy_module) + ipv6 = scan.make_event("2001:4860:4860::8888", parent=scan.root_event, module=dummy_module) + netv6 = scan.make_event("2001:4860:4860::8888/126", parent=scan.root_event, module=dummy_module) + domain = scan.make_event("publicAPIs.org", parent=scan.root_event, module=dummy_module) + subdomain = scan.make_event("api.publicAPIs.org", parent=scan.root_event, module=dummy_module) + email = scan.make_event("bob@evilcorp.co.uk", "EMAIL_ADDRESS", parent=scan.root_event, module=dummy_module) + open_port = scan.make_event("api.publicAPIs.org:443", parent=scan.root_event, module=dummy_module) protocol = scan.make_event( - {"host": "api.publicAPIs.org", "port": 443, "protocol": "HTTP"}, "PROTOCOL", parent=scan.root_event + {"host": "api.publicAPIs.org", "port": 443, "protocol": "HTTP"}, + "PROTOCOL", + parent=scan.root_event, + module=dummy_module, + ) + ipv4_open_port = scan.make_event("8.8.8.8:443", parent=scan.root_event, module=dummy_module) + ipv6_open_port = scan.make_event( + "[2001:4860:4860::8888]:443", "OPEN_TCP_PORT", parent=scan.root_event, module=dummy_module + ) + url_unverified = scan.make_event( + "https://api.publicAPIs.org:443/hellofriend", parent=scan.root_event, module=dummy_module + ) + ipv4_url_unverified = scan.make_event( + "https://8.8.8.8:443/hellofriend", parent=scan.root_event, module=dummy_module + ) + ipv6_url_unverified = scan.make_event( + "https://[2001:4860:4860::8888]:443/hellofriend", parent=scan.root_event, module=dummy_module ) - ipv4_open_port = scan.make_event("8.8.8.8:443", parent=scan.root_event) - ipv6_open_port = scan.make_event("[2001:4860:4860::8888]:443", "OPEN_TCP_PORT", parent=scan.root_event) - url_unverified = scan.make_event("https://api.publicAPIs.org:443/hellofriend", parent=scan.root_event) - ipv4_url_unverified = scan.make_event("https://8.8.8.8:443/hellofriend", parent=scan.root_event) - ipv6_url_unverified = scan.make_event("https://[2001:4860:4860::8888]:443/hellofriend", parent=scan.root_event) url = scan.make_event( - "https://api.publicAPIs.org:443/hellofriend", "URL", tags=["status-200"], parent=scan.root_event + "https://api.publicAPIs.org:443/hellofriend", + "URL", + tags=["status-200"], + parent=scan.root_event, + module=dummy_module, ) ipv4_url = scan.make_event( - "https://8.8.8.8:443/hellofriend", "URL", tags=["status-200"], parent=scan.root_event + "https://8.8.8.8:443/hellofriend", "URL", tags=["status-200"], parent=scan.root_event, module=dummy_module ) ipv6_url = scan.make_event( - "https://[2001:4860:4860::8888]:443/hellofriend", "URL", tags=["status-200"], parent=scan.root_event + "https://[2001:4860:4860::8888]:443/hellofriend", + "URL", + tags=["status-200"], + parent=scan.root_event, + module=dummy_module, + ) + url_hint = scan.make_event( + "https://api.publicAPIs.org:443/hello.ash", "URL_HINT", parent=url, module=dummy_module ) - url_hint = scan.make_event("https://api.publicAPIs.org:443/hello.ash", "URL_HINT", parent=url) vulnerability = scan.make_event( {"host": "evilcorp.com", "severity": "INFO", "description": "asdf"}, "VULNERABILITY", parent=scan.root_event, + module=dummy_module, + ) + finding = scan.make_event( + {"host": "evilcorp.com", "description": "asdf"}, "FINDING", parent=scan.root_event, module=dummy_module + ) + vhost = scan.make_event( + {"host": "evilcorp.com", "vhost": "www.evilcorp.com"}, "VHOST", parent=scan.root_event, module=dummy_module ) - finding = scan.make_event({"host": "evilcorp.com", "description": "asdf"}, "FINDING", parent=scan.root_event) - vhost = scan.make_event({"host": "evilcorp.com", "vhost": "www.evilcorp.com"}, "VHOST", parent=scan.root_event) - http_response = scan.make_event(httpx_response, "HTTP_RESPONSE", parent=scan.root_event) + http_response = scan.make_event(httpx_response, "HTTP_RESPONSE", parent=scan.root_event, module=dummy_module) storage_bucket = scan.make_event( {"name": "storage", "url": "https://storage.blob.core.windows.net"}, "STORAGE_BUCKET", parent=scan.root_event, + module=dummy_module, ) - emoji = scan.make_event("💩", "WHERE_IS_YOUR_GOD_NOW", parent=scan.root_event) + emoji = scan.make_event("💩", "WHERE_IS_YOUR_GOD_NOW", parent=scan.root_event, module=dummy_module) bbot_events.all = [ # noqa: F841 bbot_events.localhost, From 01f212be0eaf545475a53e88095834d60b257b7f Mon Sep 17 00:00:00 2001 From: github-actions Date: Thu, 21 Nov 2024 17:25:16 -0500 Subject: [PATCH 03/26] resolve conflict --- bbot/core/event/base.py | 2 +- bbot/models/helpers.py | 16 +++ bbot/models/pydantic.py | 111 ++++++++++++++++++ bbot/{db/sql/models.py => models/sql.py} | 0 bbot/modules/output/mongo.py | 68 +++++++++++ bbot/modules/templates/sql.py | 2 +- bbot/test/test_step_1/test_db_models.py | 29 +++++ .../module_tests/test_module_mongo.py | 81 +++++++++++++ 8 files changed, 307 insertions(+), 2 deletions(-) create mode 100644 bbot/models/helpers.py create mode 100644 bbot/models/pydantic.py rename bbot/{db/sql/models.py => models/sql.py} (100%) create mode 100644 bbot/modules/output/mongo.py create mode 100644 bbot/test/test_step_1/test_db_models.py create mode 100644 bbot/test/test_step_2/module_tests/test_module_mongo.py diff --git a/bbot/core/event/base.py b/bbot/core/event/base.py index 5408dadd9f..6b1176af65 100644 --- a/bbot/core/event/base.py +++ b/bbot/core/event/base.py @@ -40,7 +40,7 @@ validators, get_file_extension, ) -from bbot.db.helpers import naive_datetime_validator +from bbot.models.helpers import naive_datetime_validator log = logging.getLogger("bbot.core.event") diff --git a/bbot/models/helpers.py b/bbot/models/helpers.py new file mode 100644 index 0000000000..40e127c53b --- /dev/null +++ b/bbot/models/helpers.py @@ -0,0 +1,16 @@ +from datetime import datetime +from typing_extensions import Annotated +from pydantic.functional_validators import AfterValidator + + +def naive_datetime_validator(d: datetime): + """ + Converts all dates into UTC, then drops timezone information. + + This is needed to prevent inconsistencies in sqlite, because it is timezone-naive. + """ + # drop timezone info + return d.replace(tzinfo=None) + + +NaiveUTC = Annotated[datetime, AfterValidator(naive_datetime_validator)] diff --git a/bbot/models/pydantic.py b/bbot/models/pydantic.py new file mode 100644 index 0000000000..0d54cc91b7 --- /dev/null +++ b/bbot/models/pydantic.py @@ -0,0 +1,111 @@ +import json +import logging +from datetime import datetime +from typing import Optional, List, Union, Annotated +from pydantic import BaseModel, ConfigDict, field_serializer + +from bbot.models.helpers import NaiveUTC, naive_datetime_validator + +log = logging.getLogger("bbot_server.models") + + +class BBOTBaseModel(BaseModel): + model_config = ConfigDict(extra="ignore") + + def to_json(self, **kwargs): + return json.dumps(self.model_dump(), sort_keys=True, **kwargs) + + def __hash__(self): + return hash(self.to_json()) + + def __eq__(self, other): + return hash(self) == hash(other) + + +### EVENT ### + +class Event(BBOTBaseModel): + uuid: Annotated[str, "indexed", "unique"] + id: Annotated[str, "indexed"] + type: Annotated[str, "indexed"] + scope_description: str + data: Union[dict, str] + host: Annotated[Optional[str], "indexed"] = None + port: Optional[int] = None + netloc: Optional[str] = None + # we store the host in reverse to allow for instant subdomain queries + # this works because indexes are left-anchored, but we need to search starting from the right side + reverse_host: Annotated[Optional[str], "indexed"] = "" + resolved_hosts: Union[List, None] = None + dns_children: Union[dict, None] = None + web_spider_distance: int = 10 + scope_distance: int = 10 + scan: Annotated[str, "indexed"] + timestamp: Annotated[NaiveUTC, "indexed"] + parent: Annotated[str, "indexed"] + parent_uuid: Annotated[str, "indexed"] + tags: List = [] + module: Annotated[Optional[str], "indexed"] = None + module_sequence: Optional[str] = None + discovery_context: str = "" + discovery_path: List[str] = [] + parent_chain: List[str] = [] + + def __init__(self, **data): + super().__init__(**data) + if self.host: + self.reverse_host = self.host[::-1] + + @staticmethod + def _get_data(data, type): + if isinstance(data, dict) and list(data) == [type]: + return data[type] + return data + + @classmethod + def _indexed_fields(cls): + return sorted( + field_name for field_name, field in cls.model_fields.items() if "indexed" in field.metadata + ) + + @field_serializer("timestamp") + def serialize_timestamp(self, timestamp: datetime, _info): + return naive_datetime_validator(timestamp).isoformat() + + +### SCAN ### + +class Scan(BBOTBaseModel): + id: Annotated[str, "indexed", "unique"] + name: str + status: Annotated[str, "indexed"] + started_at: Annotated[NaiveUTC, "indexed"] + finished_at: Optional[Annotated[NaiveUTC, "indexed"]] = None + duration_seconds: Optional[float] = None + duration: Optional[str] = None + target: dict + preset: dict + + @classmethod + def from_scan(cls, scan): + return cls( + id=scan.id, + name=scan.name, + status=scan.status, + started_at=scan.started_at, + ) + + +### TARGET ### + +class Target(BBOTBaseModel): + name: str = "Default Target" + strict_scope: bool = False + seeds: List = [] + whitelist: List = [] + blacklist: List = [] + hash: Annotated[str, "indexed", "unique"] + scope_hash: Annotated[str, "indexed"] + seed_hash: Annotated[str, "indexed"] + whitelist_hash: Annotated[str, "indexed"] + blacklist_hash: Annotated[str, "indexed"] diff --git a/bbot/db/sql/models.py b/bbot/models/sql.py similarity index 100% rename from bbot/db/sql/models.py rename to bbot/models/sql.py diff --git a/bbot/modules/output/mongo.py b/bbot/modules/output/mongo.py new file mode 100644 index 0000000000..dd4efa47ce --- /dev/null +++ b/bbot/modules/output/mongo.py @@ -0,0 +1,68 @@ +from motor.motor_asyncio import AsyncIOMotorClient + +from bbot.models.pydantic import Event +from bbot.modules.output.base import BaseOutputModule + + +class Mongo(BaseOutputModule): + watched_events = ["*"] + meta = { + "description": "Output scan data to a MongoDB database", + "created_date": "2024-11-17", + "author": "@TheTechromancer", + } + options = { + "uri": "mongodb://localhost:27017", + "database": "bbot", + "collection_prefix": "", + } + options_desc = { + "uri": "The URI of the MongoDB server", + "database": "The name of the database to use", + "collection_prefix": "Prefix each collection with this string", + } + deps_pip = ["motor~=3.6.0"] + + async def setup(self): + self.uri = self.config.get("uri", "mongodb://localhost:27017") + self.db_client = AsyncIOMotorClient(self.uri) + + # Ping the server to confirm a successful connection + try: + await self.db_client.admin.command("ping") + self.verbose("MongoDB connection successful") + except Exception as e: + return False, f"Failed to connect to MongoDB: {e}" + + self.db_name = self.config.get("database", "bbot") + self.db = self.db_client[self.db_name] + self.collection_prefix = self.config.get("collection_prefix", "") + self.events_collection = self.db[f"{self.collection_prefix}events"] + self.scans_collection = self.db[f"{self.collection_prefix}scans"] + self.targets_collection = self.db[f"{self.collection_prefix}targets"] + + # Build an index for each field in reverse_host and host + for field in Event._indexed_fields(): + await self.collection.create_index([(field, 1)]) + self.verbose(f"Index created for field: {field}") + + return True + + async def handle_event(self, event): + event_json = event.json() + event_pydantic = Event(**event_json) + await self.events_collection.insert_one(event_pydantic.model_dump()) + if event.type == "SCAN": + # here we merge the scan with the one sharing its UUID. + existing_scan = await self.scans_collection.find_one({"uuid": event_pydantic.uuid}) + if existing_scan: + # Merge logic here, for example, update the existing scan with new data + updated_scan = {**existing_scan, **event_pydantic.model_dump()} + await self.scans_collection.replace_one({"uuid": event_pydantic.uuid}, updated_scan) + self.verbose(f"Updated scan event with UUID: {event_pydantic.uuid}") + else: + # Insert as a new scan if no existing scan is found + await self.scans_collection.insert_one(event_pydantic.model_dump()) + self.verbose(f"Inserted new scan event with UUID: {event_pydantic.uuid}") + + diff --git a/bbot/modules/templates/sql.py b/bbot/modules/templates/sql.py index 39b4e6f00e..42f5494555 100644 --- a/bbot/modules/templates/sql.py +++ b/bbot/modules/templates/sql.py @@ -3,7 +3,7 @@ from sqlalchemy.orm import sessionmaker from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession -from bbot.db.sql.models import Event, Scan, Target +from bbot.models.sql import Event, Scan, Target from bbot.modules.output.base import BaseOutputModule diff --git a/bbot/test/test_step_1/test_db_models.py b/bbot/test/test_step_1/test_db_models.py new file mode 100644 index 0000000000..4e003f6f57 --- /dev/null +++ b/bbot/test/test_step_1/test_db_models.py @@ -0,0 +1,29 @@ +from bbot.models.pydantic import Event +from ..bbot_fixtures import * # noqa + + +def test_pydantic_models(events): + + test_event = Event(**events.ipv4.json()) + assert sorted(test_event._indexed_fields()) == [ + "host", + "id", + "module", + "parent", + "parent_uuid", + "reverse_host", + "scan", + "timestamp", + "type", + "uuid", + ] + + # events + for event in ("http_response", "finding", "vulnerability", "ipv4", "storage_bucket"): + e = getattr(events, event) + event_json = e.json() + event_pydantic = Event(**event_json) + assert event_pydantic.model_dump(exclude_none=True, exclude=["reverse_host"]) == event_json + + +# TODO: SQL diff --git a/bbot/test/test_step_2/module_tests/test_module_mongo.py b/bbot/test/test_step_2/module_tests/test_module_mongo.py new file mode 100644 index 0000000000..10a8655e81 --- /dev/null +++ b/bbot/test/test_step_2/module_tests/test_module_mongo.py @@ -0,0 +1,81 @@ +from .base import ModuleTestBase + + +class TestMongo(ModuleTestBase): + test_db_name = "bbot_test" + test_collection_name = "events_test" + config_overrides = {"modules": {"mongo": {"database": test_db_name, "collection": test_collection_name}}} + + async def setup_before_module(self): + from motor.motor_asyncio import AsyncIOMotorClient + + # Connect to the MongoDB collection + client = AsyncIOMotorClient("mongodb://localhost:27017") + db = client[self.test_db_name] + collection = db.get_collection(self.test_collection_name) + + # Check that there are no events in the collection + count = await collection.count_documents({}) + assert count == 0, "There are existing events in the database" + + # Close the MongoDB connection + client.close() + + async def check(self, module_test, events): + try: + from bbot.models.pydantic import Event + from motor.motor_asyncio import AsyncIOMotorClient + + events_json = [e.json() for e in events] + events_json.sort(key=lambda x: x["timestamp"]) + + # Connect to the MongoDB collection + client = AsyncIOMotorClient("mongodb://localhost:27017") + db = client[self.test_db_name] + collection = db.get_collection(self.test_collection_name) + + # make sure the collection has all the right indexes + cursor = collection.list_indexes() + indexes = await cursor.to_list(length=None) + for field in Event._indexed_fields(): + assert any(field in index["key"] for index in indexes), f"Index for {field} not found" + + # Fetch all events from the collection + cursor = collection.find({}) + db_events = await cursor.to_list(length=None) + + # Convert to Pydantic objects and dump them + db_events_pydantic = [Event(**e).model_dump(exclude_none=True) for e in db_events] + db_events_pydantic.sort(key=lambda x: x["timestamp"]) + + # Find the main event with type DNS_NAME and data blacklanternsecurity.com + main_event = next( + ( + e + for e in db_events_pydantic + if e.get("type") == "DNS_NAME" and e.get("data") == "blacklanternsecurity.com" + ), + None, + ) + assert main_event is not None, "Main event with type DNS_NAME and data blacklanternsecurity.com not found" + + # Ensure it has the reverse_host attribute + expected_reverse_host = "blacklanternsecurity.com"[::-1] + assert ( + main_event.get("reverse_host") == expected_reverse_host + ), f"reverse_host attribute is not correct, expected {expected_reverse_host}" + + # Compare the sorted lists + assert len(events_json) == len(db_events_pydantic) + # Events don't match exactly because the mongo ones have reverse_host + assert events_json != db_events_pydantic + for db_event in db_events_pydantic: + db_event.pop("reverse_host") + # They should match after removing reverse_host + assert events_json == db_events_pydantic, "Events do not match" + + finally: + # Clean up: Delete all documents in the collection + await collection.delete_many({}) + # Close the MongoDB connection + client.close() From 820ddf5dc3a8798c3dc38182dc6d7cca64592658 Mon Sep 17 00:00:00 2001 From: github-actions Date: Tue, 19 Nov 2024 18:39:37 -0500 Subject: [PATCH 04/26] more wip mongo --- bbot/models/helpers.py | 6 ++++- bbot/models/pydantic.py | 31 +++++++++++++++++-------- bbot/modules/output/mongo.py | 19 +++++++-------- bbot/test/test_step_1/test_db_models.py | 8 +++++++ 4 files changed, 43 insertions(+), 21 deletions(-) diff --git a/bbot/models/helpers.py b/bbot/models/helpers.py index 40e127c53b..985c845994 100644 --- a/bbot/models/helpers.py +++ b/bbot/models/helpers.py @@ -3,7 +3,7 @@ from pydantic.functional_validators import AfterValidator -def naive_datetime_validator(d: datetime): +def naive_datetime_validator(d: datetime) -> datetime: """ Converts all dates into UTC, then drops timezone information. @@ -13,4 +13,8 @@ def naive_datetime_validator(d: datetime): return d.replace(tzinfo=None) +def naive_utc_now() -> datetime: + return naive_datetime_validator(datetime.now()) + + NaiveUTC = Annotated[datetime, AfterValidator(naive_datetime_validator)] diff --git a/bbot/models/pydantic.py b/bbot/models/pydantic.py index 0d54cc91b7..fe179878e7 100644 --- a/bbot/models/pydantic.py +++ b/bbot/models/pydantic.py @@ -2,9 +2,9 @@ import logging from datetime import datetime from typing import Optional, List, Union, Annotated -from pydantic import BaseModel, ConfigDict, field_serializer +from pydantic import BaseModel, ConfigDict, field_serializer, Field -from bbot.models.helpers import NaiveUTC, naive_datetime_validator +from bbot.models.helpers import NaiveUTC, naive_datetime_validator, naive_utc_now log = logging.getLogger("bbot_server.models") @@ -12,8 +12,18 @@ class BBOTBaseModel(BaseModel): model_config = ConfigDict(extra="ignore") - def to_json(self, **kwargs): - return json.dumps(self.model_dump(), sort_keys=True, **kwargs) + def to_json(self, preserve_datetime=False): + ret = self.model_dump() + if preserve_datetime: + for key in ret: + val = getattr(self, key, None) + if isinstance(val, datetime): + ret[key] = val + return ret + + def to_json_string(self, preserve_datetime=False, **kwargs): + kwargs['sort_keys'] = True + return json.dumps(self.to_json(preserve_datetime=preserve_datetime), **kwargs) def __hash__(self): return hash(self.to_json()) @@ -21,6 +31,12 @@ def __hash__(self): def __eq__(self, other): return hash(self) == hash(other) + @classmethod + def _indexed_fields(cls): + return sorted( + field_name for field_name, field in cls.model_fields.items() if "indexed" in field.metadata + ) + ### EVENT ### @@ -42,6 +58,7 @@ class Event(BBOTBaseModel): scope_distance: int = 10 scan: Annotated[str, "indexed"] timestamp: Annotated[NaiveUTC, "indexed"] + inserted_at: Optional[Annotated[NaiveUTC, "indexed"]] = Field(default_factory=naive_utc_now) parent: Annotated[str, "indexed"] parent_uuid: Annotated[str, "indexed"] tags: List = [] @@ -62,12 +79,6 @@ def _get_data(data, type): return data[type] return data - @classmethod - def _indexed_fields(cls): - return sorted( - field_name for field_name, field in cls.model_fields.items() if "indexed" in field.metadata - ) - @field_serializer("timestamp") def serialize_timestamp(self, timestamp: datetime, _info): return naive_datetime_validator(timestamp).isoformat() diff --git a/bbot/modules/output/mongo.py b/bbot/modules/output/mongo.py index dd4efa47ce..bb92d19d8a 100644 --- a/bbot/modules/output/mongo.py +++ b/bbot/modules/output/mongo.py @@ -1,6 +1,6 @@ from motor.motor_asyncio import AsyncIOMotorClient -from bbot.models.pydantic import Event +from bbot.models.pydantic import Event, Scan, Target from bbot.modules.output.base import BaseOutputModule @@ -42,9 +42,11 @@ async def setup(self): self.targets_collection = self.db[f"{self.collection_prefix}targets"] # Build an index for each field in reverse_host and host - for field in Event._indexed_fields(): - await self.collection.create_index([(field, 1)]) - self.verbose(f"Index created for field: {field}") + for field in Event.model_fields: + if "indexed" in field.metadata: + unique = "unique" in field.metadata + await self.collection.create_index([(field, 1)], unique=unique) + self.verbose(f"Index created for field: {field}") return True @@ -52,17 +54,14 @@ async def handle_event(self, event): event_json = event.json() event_pydantic = Event(**event_json) await self.events_collection.insert_one(event_pydantic.model_dump()) + if event.type == "SCAN": - # here we merge the scan with the one sharing its UUID. + scan_json = Scan.from_event(event).model_dump() existing_scan = await self.scans_collection.find_one({"uuid": event_pydantic.uuid}) if existing_scan: - # Merge logic here, for example, update the existing scan with new data - updated_scan = {**existing_scan, **event_pydantic.model_dump()} - await self.scans_collection.replace_one({"uuid": event_pydantic.uuid}, updated_scan) + await self.scans_collection.replace_one({"uuid": event_pydantic.uuid}, scan_json) self.verbose(f"Updated scan event with UUID: {event_pydantic.uuid}") else: # Insert as a new scan if no existing scan is found await self.scans_collection.insert_one(event_pydantic.model_dump()) self.verbose(f"Inserted new scan event with UUID: {event_pydantic.uuid}") - - diff --git a/bbot/test/test_step_1/test_db_models.py b/bbot/test/test_step_1/test_db_models.py index 4e003f6f57..1ba970f0e7 100644 --- a/bbot/test/test_step_1/test_db_models.py +++ b/bbot/test/test_step_1/test_db_models.py @@ -1,3 +1,5 @@ +from datetime import datetime + from bbot.models.pydantic import Event from ..bbot_fixtures import * # noqa @@ -23,6 +25,12 @@ def test_pydantic_models(events): e = getattr(events, event) event_json = e.json() event_pydantic = Event(**event_json) + event_pydantic_dict = event_pydantic.to_json() + event_pydantic_dict_datetime = event_pydantic.to_json(preserve_datetime=True) + assert isinstance(event_pydantic_dict["timestamp"], str) + assert isinstance(event_pydantic_dict["inserted_at"], str) + assert isinstance(event_pydantic_dict_datetime["timestamp"], datetime) + assert isinstance(event_pydantic_dict_datetime["inserted_at"], datetime) assert event_pydantic.model_dump(exclude_none=True, exclude=["reverse_host"]) == event_json From 5591129af2c27eac78f555b774d80348105b0f6e Mon Sep 17 00:00:00 2001 From: github-actions Date: Tue, 19 Nov 2024 20:08:24 -0500 Subject: [PATCH 05/26] more mongo wip --- bbot/models/pydantic.py | 66 ++++++++------- bbot/modules/output/mongo.py | 34 ++++---- bbot/test/test_step_1/test_db_models.py | 14 +++- .../module_tests/test_module_mongo.py | 81 +++++++++++++++---- 4 files changed, 133 insertions(+), 62 deletions(-) diff --git a/bbot/models/pydantic.py b/bbot/models/pydantic.py index fe179878e7..906801693a 100644 --- a/bbot/models/pydantic.py +++ b/bbot/models/pydantic.py @@ -1,10 +1,9 @@ -import json import logging from datetime import datetime -from typing import Optional, List, Union, Annotated -from pydantic import BaseModel, ConfigDict, field_serializer, Field +from pydantic import BaseModel, ConfigDict, Field +from typing import Optional, List, Union, Annotated, get_type_hints -from bbot.models.helpers import NaiveUTC, naive_datetime_validator, naive_utc_now +from bbot.models.helpers import NaiveUTC, naive_utc_now log = logging.getLogger("bbot_server.models") @@ -12,19 +11,14 @@ class BBOTBaseModel(BaseModel): model_config = ConfigDict(extra="ignore") - def to_json(self, preserve_datetime=False): - ret = self.model_dump() - if preserve_datetime: - for key in ret: - val = getattr(self, key, None) - if isinstance(val, datetime): - ret[key] = val + def model_dump(self, preserve_datetime=False, **kwargs): + ret = super().model_dump(**kwargs) + if not preserve_datetime: + for datetime_field in self._datetime_fields(): + if datetime_field in ret: + ret[datetime_field] = ret[datetime_field].isoformat() return ret - def to_json_string(self, preserve_datetime=False, **kwargs): - kwargs['sort_keys'] = True - return json.dumps(self.to_json(preserve_datetime=preserve_datetime), **kwargs) - def __hash__(self): return hash(self.to_json()) @@ -33,13 +27,37 @@ def __eq__(self, other): @classmethod def _indexed_fields(cls): - return sorted( - field_name for field_name, field in cls.model_fields.items() if "indexed" in field.metadata - ) + return sorted(field_name for field_name, field in cls.model_fields.items() if "indexed" in field.metadata) + + @classmethod + def _get_type_hints(cls): + """ + Drills down past all the Annotated, Optional, and Union layers to get the underlying type hint + """ + type_hints = get_type_hints(cls) + unwrapped_type_hints = {} + for field_name in cls.model_fields: + type_hint = type_hints[field_name] + while 1: + if getattr(type_hint, "__origin__", None) in (Annotated, Optional, Union): + type_hint = type_hint.__args__[0] + else: + break + unwrapped_type_hints[field_name] = type_hint + return unwrapped_type_hints + + @classmethod + def _datetime_fields(cls): + datetime_fields = [] + for field_name, type_hint in cls._get_type_hints().items(): + if type_hint == datetime: + datetime_fields.append(field_name) + return sorted(datetime_fields) ### EVENT ### + class Event(BBOTBaseModel): uuid: Annotated[str, "indexed", "unique"] id: Annotated[str, "indexed"] @@ -73,19 +91,10 @@ def __init__(self, **data): if self.host: self.reverse_host = self.host[::-1] - @staticmethod - def _get_data(data, type): - if isinstance(data, dict) and list(data) == [type]: - return data[type] - return data - - @field_serializer("timestamp") - def serialize_timestamp(self, timestamp: datetime, _info): - return naive_datetime_validator(timestamp).isoformat() - ### SCAN ### + class Scan(BBOTBaseModel): id: Annotated[str, "indexed", "unique"] name: str @@ -109,6 +118,7 @@ def from_scan(cls, scan): ### TARGET ### + class Target(BBOTBaseModel): name: str = "Default Target" strict_scope: bool = False diff --git a/bbot/modules/output/mongo.py b/bbot/modules/output/mongo.py index bb92d19d8a..bc323d7ad9 100644 --- a/bbot/modules/output/mongo.py +++ b/bbot/modules/output/mongo.py @@ -14,18 +14,24 @@ class Mongo(BaseOutputModule): options = { "uri": "mongodb://localhost:27017", "database": "bbot", + "username": "", + "password": "", "collection_prefix": "", } options_desc = { "uri": "The URI of the MongoDB server", "database": "The name of the database to use", + "username": "The username to use to connect to the database", + "password": "The password to use to connect to the database", "collection_prefix": "Prefix each collection with this string", } deps_pip = ["motor~=3.6.0"] async def setup(self): self.uri = self.config.get("uri", "mongodb://localhost:27017") - self.db_client = AsyncIOMotorClient(self.uri) + self.username = self.config.get("username", "") + self.password = self.config.get("password", "") + self.db_client = AsyncIOMotorClient(self.uri, username=self.username, password=self.password) # Ping the server to confirm a successful connection try: @@ -42,11 +48,11 @@ async def setup(self): self.targets_collection = self.db[f"{self.collection_prefix}targets"] # Build an index for each field in reverse_host and host - for field in Event.model_fields: + for field_name, field in Event.model_fields.items(): if "indexed" in field.metadata: unique = "unique" in field.metadata - await self.collection.create_index([(field, 1)], unique=unique) - self.verbose(f"Index created for field: {field}") + await self.events_collection.create_index([(field_name, 1)], unique=unique) + self.verbose(f"Index created for field: {field_name} (unique={unique})") return True @@ -55,13 +61,13 @@ async def handle_event(self, event): event_pydantic = Event(**event_json) await self.events_collection.insert_one(event_pydantic.model_dump()) - if event.type == "SCAN": - scan_json = Scan.from_event(event).model_dump() - existing_scan = await self.scans_collection.find_one({"uuid": event_pydantic.uuid}) - if existing_scan: - await self.scans_collection.replace_one({"uuid": event_pydantic.uuid}, scan_json) - self.verbose(f"Updated scan event with UUID: {event_pydantic.uuid}") - else: - # Insert as a new scan if no existing scan is found - await self.scans_collection.insert_one(event_pydantic.model_dump()) - self.verbose(f"Inserted new scan event with UUID: {event_pydantic.uuid}") + # if event.type == "SCAN": + # scan_json = Scan.from_event(event).model_dump() + # existing_scan = await self.scans_collection.find_one({"uuid": event_pydantic.uuid}) + # if existing_scan: + # await self.scans_collection.replace_one({"uuid": event_pydantic.uuid}, scan_json) + # self.verbose(f"Updated scan event with UUID: {event_pydantic.uuid}") + # else: + # # Insert as a new scan if no existing scan is found + # await self.scans_collection.insert_one(event_pydantic.model_dump()) + # self.verbose(f"Inserted new scan event with UUID: {event_pydantic.uuid}") diff --git a/bbot/test/test_step_1/test_db_models.py b/bbot/test/test_step_1/test_db_models.py index 1ba970f0e7..5a6fce547c 100644 --- a/bbot/test/test_step_1/test_db_models.py +++ b/bbot/test/test_step_1/test_db_models.py @@ -6,6 +6,8 @@ def test_pydantic_models(events): + assert Event._datetime_fields() == ["inserted_at", "timestamp"] + test_event = Event(**events.ipv4.json()) assert sorted(test_event._indexed_fields()) == [ "host", @@ -20,18 +22,22 @@ def test_pydantic_models(events): "uuid", ] - # events + # convert events to pydantic and back, making sure they're exactly the same for event in ("http_response", "finding", "vulnerability", "ipv4", "storage_bucket"): e = getattr(events, event) event_json = e.json() event_pydantic = Event(**event_json) - event_pydantic_dict = event_pydantic.to_json() - event_pydantic_dict_datetime = event_pydantic.to_json(preserve_datetime=True) + event_pydantic_dict = event_pydantic.model_dump() + event_pydantic_dict_datetime = event_pydantic.model_dump(preserve_datetime=True) + assert isinstance(event_json["timestamp"], str) + assert isinstance(e.timestamp, datetime) + assert isinstance(event_pydantic.timestamp, datetime) + assert not "inserted_at" in event_json assert isinstance(event_pydantic_dict["timestamp"], str) assert isinstance(event_pydantic_dict["inserted_at"], str) assert isinstance(event_pydantic_dict_datetime["timestamp"], datetime) assert isinstance(event_pydantic_dict_datetime["inserted_at"], datetime) - assert event_pydantic.model_dump(exclude_none=True, exclude=["reverse_host"]) == event_json + assert event_pydantic.model_dump(exclude_none=True, exclude=["reverse_host", "inserted_at"]) == event_json # TODO: SQL diff --git a/bbot/test/test_step_2/module_tests/test_module_mongo.py b/bbot/test/test_step_2/module_tests/test_module_mongo.py index 10a8655e81..839e46156e 100644 --- a/bbot/test/test_step_2/module_tests/test_module_mongo.py +++ b/bbot/test/test_step_2/module_tests/test_module_mongo.py @@ -1,21 +1,58 @@ +import time +import asyncio + from .base import ModuleTestBase class TestMongo(ModuleTestBase): test_db_name = "bbot_test" - test_collection_name = "events_test" - config_overrides = {"modules": {"mongo": {"database": test_db_name, "collection": test_collection_name}}} + test_collection_prefix = "test_" + config_overrides = { + "modules": { + "mongo": { + "database": test_db_name, + "username": "bbot", + "password": "bbotislife", + "collection_prefix": test_collection_prefix, + } + } + } + + async def setup_before_prep(self, module_test): + + await asyncio.create_subprocess_exec( + "docker", + "run", + "--name", + "bbot-test-mongo", + "--rm", + "-e", + "MONGO_INITDB_ROOT_USERNAME=bbot", + "-e", + "MONGO_INITDB_ROOT_PASSWORD=bbotislife", + "-p", + "27017:27017", + "-d", + "mongo", + ) - async def setup_before_module(self): from motor.motor_asyncio import AsyncIOMotorClient - # Connect to the MongoDB collection - client = AsyncIOMotorClient("mongodb://localhost:27017") - db = client[self.test_db_name] - collection = db.get_collection(self.test_collection_name) + # Connect to the MongoDB collection with retry logic + while True: + try: + client = AsyncIOMotorClient("mongodb://localhost:27017", username="bbot", password="bbotislife") + db = client[self.test_db_name] + events_collection = db.get_collection(self.test_collection_prefix + "events") + # Attempt a simple operation to confirm the connection + await events_collection.count_documents({}) + break # Exit the loop if connection is successful + except Exception as e: + print(f"Connection failed: {e}. Retrying in 5 seconds...") + time.sleep(0.5) # Check that there are no events in the collection - count = await collection.count_documents({}) + count = await events_collection.count_documents({}) assert count == 0, "There are existing events in the database" # Close the MongoDB connection @@ -30,20 +67,30 @@ async def check(self, module_test, events): events_json.sort(key=lambda x: x["timestamp"]) # Connect to the MongoDB collection - client = AsyncIOMotorClient("mongodb://localhost:27017") + client = AsyncIOMotorClient("mongodb://localhost:27017", username="bbot", password="bbotislife") db = client[self.test_db_name] - collection = db.get_collection(self.test_collection_name) + events_collection = db.get_collection(self.test_collection_prefix + "events") # make sure the collection has all the right indexes - cursor = collection.list_indexes() + cursor = events_collection.list_indexes() indexes = await cursor.to_list(length=None) for field in Event._indexed_fields(): assert any(field in index["key"] for index in indexes), f"Index for {field} not found" # Fetch all events from the collection - cursor = collection.find({}) + cursor = events_collection.find({}) db_events = await cursor.to_list(length=None) + # make sure we have the same number of events + assert len(events_json) == len(db_events) + + for db_event in db_events: + # we currently don't store timestamps as datetime objects because mongodb has lower precision + # assert isinstance(db_event["timestamp"], datetime) + # assert isinstance(db_event["inserted_at"], datetime) + assert isinstance(db_event["timestamp"], str) + assert isinstance(db_event["inserted_at"], str) + # Convert to Pydantic objects and dump them db_events_pydantic = [Event(**e).model_dump(exclude_none=True) for e in db_events] db_events_pydantic.sort(key=lambda x: x["timestamp"]) @@ -65,17 +112,19 @@ async def check(self, module_test, events): main_event.get("reverse_host") == expected_reverse_host ), f"reverse_host attribute is not correct, expected {expected_reverse_host}" - # Compare the sorted lists - assert len(events_json) == len(db_events_pydantic) - # Events don't match exactly because the mongo ones have reverse_host + # Events don't match exactly because the mongo ones have reverse_host and inserted_at assert events_json != db_events_pydantic for db_event in db_events_pydantic: db_event.pop("reverse_host") + db_event.pop("inserted_at") # They should match after removing reverse_host assert events_json == db_events_pydantic, "Events do not match" finally: # Clean up: Delete all documents in the collection - await collection.delete_many({}) + await events_collection.delete_many({}) # Close the MongoDB connection client.close() + await asyncio.create_subprocess_exec( + "docker", "stop", "bbot-test-mongo", stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE + ) From 240fd68c3ec7647c703d05a07bcfeb8eb3ee9a7f Mon Sep 17 00:00:00 2001 From: github-actions Date: Wed, 20 Nov 2024 11:54:12 -0500 Subject: [PATCH 06/26] skip distro tests --- bbot/test/test_step_2/module_tests/test_module_mongo.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bbot/test/test_step_2/module_tests/test_module_mongo.py b/bbot/test/test_step_2/module_tests/test_module_mongo.py index 839e46156e..31e7f70747 100644 --- a/bbot/test/test_step_2/module_tests/test_module_mongo.py +++ b/bbot/test/test_step_2/module_tests/test_module_mongo.py @@ -17,6 +17,7 @@ class TestMongo(ModuleTestBase): } } } + skip_distro_tests = True async def setup_before_prep(self, module_test): @@ -48,7 +49,7 @@ async def setup_before_prep(self, module_test): await events_collection.count_documents({}) break # Exit the loop if connection is successful except Exception as e: - print(f"Connection failed: {e}. Retrying in 5 seconds...") + print(f"Connection failed: {e}. Retrying...") time.sleep(0.5) # Check that there are no events in the collection From 764753391b59c543d85e0b96caeefbde1df769b9 Mon Sep 17 00:00:00 2001 From: github-actions Date: Thu, 21 Nov 2024 10:35:12 -0500 Subject: [PATCH 07/26] more wip mongo --- bbot/core/event/base.py | 8 ++++++-- bbot/models/pydantic.py | 10 +++++----- bbot/modules/output/mongo.py | 20 ++++++++++---------- bbot/test/bbot_fixtures.py | 14 +++++++------- bbot/test/test_step_1/test_db_models.py | 9 +++------ bbot/test/test_step_1/test_events.py | 6 +++--- 6 files changed, 34 insertions(+), 33 deletions(-) diff --git a/bbot/core/event/base.py b/bbot/core/event/base.py index 6b1176af65..53e2c62236 100644 --- a/bbot/core/event/base.py +++ b/bbot/core/event/base.py @@ -803,7 +803,7 @@ def json(self, mode="json", siem_friendly=False): if self.scan: j["scan"] = self.scan.id # timestamp - j["timestamp"] = naive_datetime_validator(self.timestamp).isoformat() + j["timestamp"] = naive_datetime_validator(self.timestamp).timestamp() # parent event parent_id = self.parent_id if parent_id: @@ -1773,7 +1773,11 @@ def event_from_json(j, siem_friendly=False): resolved_hosts = j.get("resolved_hosts", []) event._resolved_hosts = set(resolved_hosts) - event.timestamp = datetime.datetime.fromisoformat(j["timestamp"]) + # accept both isoformat and unix timestamp + try: + event.timestamp = datetime.datetime.fromtimestamp(j["timestamp"]) + except Exception: + event.timestamp = datetime.datetime.fromisoformat(j["timestamp"]) event.scope_distance = j["scope_distance"] parent_id = j.get("parent", None) if parent_id is not None: diff --git a/bbot/models/pydantic.py b/bbot/models/pydantic.py index 906801693a..388d85f05f 100644 --- a/bbot/models/pydantic.py +++ b/bbot/models/pydantic.py @@ -11,12 +11,12 @@ class BBOTBaseModel(BaseModel): model_config = ConfigDict(extra="ignore") - def model_dump(self, preserve_datetime=False, **kwargs): + def model_dump(self, **kwargs): ret = super().model_dump(**kwargs) - if not preserve_datetime: - for datetime_field in self._datetime_fields(): - if datetime_field in ret: - ret[datetime_field] = ret[datetime_field].isoformat() + # convert datetime fields to unix timestamps + for datetime_field in self._datetime_fields(): + if datetime_field in ret: + ret[datetime_field] = ret[datetime_field].timestamp() return ret def __hash__(self): diff --git a/bbot/modules/output/mongo.py b/bbot/modules/output/mongo.py index bc323d7ad9..03185b169c 100644 --- a/bbot/modules/output/mongo.py +++ b/bbot/modules/output/mongo.py @@ -61,13 +61,13 @@ async def handle_event(self, event): event_pydantic = Event(**event_json) await self.events_collection.insert_one(event_pydantic.model_dump()) - # if event.type == "SCAN": - # scan_json = Scan.from_event(event).model_dump() - # existing_scan = await self.scans_collection.find_one({"uuid": event_pydantic.uuid}) - # if existing_scan: - # await self.scans_collection.replace_one({"uuid": event_pydantic.uuid}, scan_json) - # self.verbose(f"Updated scan event with UUID: {event_pydantic.uuid}") - # else: - # # Insert as a new scan if no existing scan is found - # await self.scans_collection.insert_one(event_pydantic.model_dump()) - # self.verbose(f"Inserted new scan event with UUID: {event_pydantic.uuid}") + if event.type == "SCAN": + scan_json = Scan.from_event(event).model_dump() + existing_scan = await self.scans_collection.find_one({"uuid": event_pydantic.uuid}) + if existing_scan: + await self.scans_collection.replace_one({"uuid": event_pydantic.uuid}, scan_json) + self.verbose(f"Updated scan event with UUID: {event_pydantic.uuid}") + else: + # Insert as a new scan if no existing scan is found + await self.scans_collection.insert_one(event_pydantic.model_dump()) + self.verbose(f"Inserted new scan event with UUID: {event_pydantic.uuid}") diff --git a/bbot/test/bbot_fixtures.py b/bbot/test/bbot_fixtures.py index 4d73d036c1..229c58a290 100644 --- a/bbot/test/bbot_fixtures.py +++ b/bbot/test/bbot_fixtures.py @@ -254,12 +254,12 @@ class bbot_events: return bbot_events -@pytest.fixture(scope="session", autouse=True) -def install_all_python_deps(): - deps_pip = set() - for module in DEFAULT_PRESET.module_loader.preloaded().values(): - deps_pip.update(set(module.get("deps", {}).get("pip", []))) +# @pytest.fixture(scope="session", autouse=True) +# def install_all_python_deps(): +# deps_pip = set() +# for module in DEFAULT_PRESET.module_loader.preloaded().values(): +# deps_pip.update(set(module.get("deps", {}).get("pip", []))) - constraint_file = tempwordlist(get_python_constraints()) +# constraint_file = tempwordlist(get_python_constraints()) - subprocess.run([sys.executable, "-m", "pip", "install", "--constraint", constraint_file] + list(deps_pip)) +# subprocess.run([sys.executable, "-m", "pip", "install", "--constraint", constraint_file] + list(deps_pip)) diff --git a/bbot/test/test_step_1/test_db_models.py b/bbot/test/test_step_1/test_db_models.py index 5a6fce547c..d29e7e79a8 100644 --- a/bbot/test/test_step_1/test_db_models.py +++ b/bbot/test/test_step_1/test_db_models.py @@ -28,15 +28,12 @@ def test_pydantic_models(events): event_json = e.json() event_pydantic = Event(**event_json) event_pydantic_dict = event_pydantic.model_dump() - event_pydantic_dict_datetime = event_pydantic.model_dump(preserve_datetime=True) - assert isinstance(event_json["timestamp"], str) + assert isinstance(event_json["timestamp"], float) assert isinstance(e.timestamp, datetime) assert isinstance(event_pydantic.timestamp, datetime) assert not "inserted_at" in event_json - assert isinstance(event_pydantic_dict["timestamp"], str) - assert isinstance(event_pydantic_dict["inserted_at"], str) - assert isinstance(event_pydantic_dict_datetime["timestamp"], datetime) - assert isinstance(event_pydantic_dict_datetime["inserted_at"], datetime) + assert isinstance(event_pydantic_dict["timestamp"], float) + assert isinstance(event_pydantic_dict["inserted_at"], float) assert event_pydantic.model_dump(exclude_none=True, exclude=["reverse_host", "inserted_at"]) == event_json diff --git a/bbot/test/test_step_1/test_events.py b/bbot/test/test_step_1/test_events.py index 8156fc7969..5c6dedad8a 100644 --- a/bbot/test/test_step_1/test_events.py +++ b/bbot/test/test_step_1/test_events.py @@ -494,7 +494,7 @@ async def test_events(events, helpers): assert db_event.parent_chain[0] == str(db_event.uuid) assert db_event.parent.uuid == scan.root_event.uuid assert db_event.parent_uuid == scan.root_event.uuid - timestamp = db_event.timestamp.isoformat() + timestamp = db_event.timestamp.replace(tzinfo=None).timestamp() json_event = db_event.json() assert isinstance(json_event["uuid"], str) assert json_event["uuid"] == str(db_event.uuid) @@ -515,7 +515,7 @@ async def test_events(events, helpers): assert reconstituted_event.uuid == db_event.uuid assert reconstituted_event.parent_uuid == scan.root_event.uuid assert reconstituted_event.scope_distance == 1 - assert reconstituted_event.timestamp.isoformat() == timestamp + assert reconstituted_event.timestamp.timestamp() == timestamp assert reconstituted_event.data == "evilcorp.com:80" assert reconstituted_event.type == "OPEN_TCP_PORT" assert reconstituted_event.host == "evilcorp.com" @@ -538,7 +538,7 @@ async def test_events(events, helpers): assert json_event_siemfriendly["timestamp"] == timestamp reconstituted_event2 = event_from_json(json_event_siemfriendly, siem_friendly=True) assert reconstituted_event2.scope_distance == 1 - assert reconstituted_event2.timestamp.isoformat() == timestamp + assert reconstituted_event2.timestamp.timestamp() == timestamp assert reconstituted_event2.data == "evilcorp.com:80" assert reconstituted_event2.type == "OPEN_TCP_PORT" assert reconstituted_event2.host == "evilcorp.com" From e6da98353681e8b853140e8a17444780d97684dc Mon Sep 17 00:00:00 2001 From: github-actions Date: Thu, 21 Nov 2024 19:14:15 -0500 Subject: [PATCH 08/26] remove siem_friendly --- bbot/core/event/base.py | 18 +++++++--------- bbot/models/pydantic.py | 14 ++++++++----- bbot/models/sql.py | 21 +++++++------------ bbot/modules/output/http.py | 5 +---- bbot/modules/output/json.py | 6 ++---- bbot/modules/output/mongo.py | 8 +++++++ bbot/test/test_step_1/test_events.py | 21 +++++-------------- .../module_tests/test_module_http.py | 9 -------- .../module_tests/test_module_json.py | 15 ------------- .../module_tests/test_module_mongo.py | 20 +++++++++++++----- docs/scanning/tips_and_tricks.md | 18 ---------------- 11 files changed, 55 insertions(+), 100 deletions(-) diff --git a/bbot/core/event/base.py b/bbot/core/event/base.py index 53e2c62236..29f10190e2 100644 --- a/bbot/core/event/base.py +++ b/bbot/core/event/base.py @@ -756,7 +756,7 @@ def __contains__(self, other): return bool(radixtarget.search(other.host)) return False - def json(self, mode="json", siem_friendly=False): + def json(self, mode="json"): """ Serializes the event object to a JSON-compatible dictionary. @@ -765,7 +765,6 @@ def json(self, mode="json", siem_friendly=False): Parameters: mode (str): Specifies the data serialization mode. Default is "json". Other options include "graph", "human", and "id". - siem_friendly (bool): Whether to format the JSON in a way that's friendly to SIEM ingestion by Elastic, Splunk, etc. This ensures the value of "data" is always the same type (a dictionary). Returns: dict: JSON-serializable dictionary representation of the event object. @@ -782,10 +781,12 @@ def json(self, mode="json", siem_friendly=False): data = data_attr else: data = smart_decode(self.data) - if siem_friendly: - j["data"] = {self.type: data} - else: + if isinstance(data, str): j["data"] = data + elif isinstance(data, dict): + j["data_json"] = data + else: + raise ValueError(f"Invalid data type: {type(data)}") # host, dns children if self.host: j["host"] = str(self.host) @@ -1728,7 +1729,7 @@ def make_event( ) -def event_from_json(j, siem_friendly=False): +def event_from_json(j): """ Creates an event object from a JSON dictionary. @@ -1760,10 +1761,7 @@ def event_from_json(j, siem_friendly=False): "context": j.get("discovery_context", None), "dummy": True, } - if siem_friendly: - data = j["data"][event_type] - else: - data = j["data"] + data = j.get("data_json", j.get("data", None)) kwargs["data"] = data event = make_event(**kwargs) event_uuid = j.get("uuid", None) diff --git a/bbot/models/pydantic.py b/bbot/models/pydantic.py index 388d85f05f..0591a93515 100644 --- a/bbot/models/pydantic.py +++ b/bbot/models/pydantic.py @@ -57,13 +57,13 @@ def _datetime_fields(cls): ### EVENT ### - class Event(BBOTBaseModel): uuid: Annotated[str, "indexed", "unique"] id: Annotated[str, "indexed"] type: Annotated[str, "indexed"] scope_description: str - data: Union[dict, str] + data: Annotated[Optional[str], "indexed"] = None + data_json: Optional[dict] = None host: Annotated[Optional[str], "indexed"] = None port: Optional[int] = None netloc: Optional[str] = None @@ -75,8 +75,8 @@ class Event(BBOTBaseModel): web_spider_distance: int = 10 scope_distance: int = 10 scan: Annotated[str, "indexed"] - timestamp: Annotated[NaiveUTC, "indexed"] - inserted_at: Optional[Annotated[NaiveUTC, "indexed"]] = Field(default_factory=naive_utc_now) + timestamp: Annotated[float, "indexed"] + inserted_at: Annotated[Optional[float], "indexed"] = Field(default_factory=naive_utc_now) parent: Annotated[str, "indexed"] parent_uuid: Annotated[str, "indexed"] tags: List = [] @@ -91,9 +91,13 @@ def __init__(self, **data): if self.host: self.reverse_host = self.host[::-1] + def get_data(self): + if self.data is not None: + return self.data + return self.data_json -### SCAN ### +### SCAN ### class Scan(BBOTBaseModel): id: Annotated[str, "indexed", "unique"] diff --git a/bbot/models/sql.py b/bbot/models/sql.py index e937fad1e6..2640e3ca81 100644 --- a/bbot/models/sql.py +++ b/bbot/models/sql.py @@ -67,25 +67,19 @@ def __eq__(self, other): ### EVENT ### - class Event(BBOTBaseModel, table=True): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - data = self._get_data(self.data, self.type) - self.data = {self.type: data} + if self.data is None and self.data_json is None: + raise ValueError("data or data_json must be provided") if self.host: self.reverse_host = self.host[::-1] def get_data(self): - return self._get_data(self.data, self.type) - - @staticmethod - def _get_data(data, type): - # handle SIEM-friendly format - if isinstance(data, dict) and list(data) == [type]: - return data[type] - return data + if self.data is not None: + return self.data + return self.data_json uuid: str = Field( primary_key=True, @@ -95,7 +89,8 @@ def _get_data(data, type): id: str = Field(index=True) type: str = Field(index=True) scope_description: str - data: dict = Field(sa_type=JSON) + data: Optional[str] = Field(default=None, index=True) + data_json: Optional[dict] = Field(default=None) host: Optional[str] port: Optional[int] netloc: Optional[str] @@ -119,7 +114,6 @@ def _get_data(data, type): ### SCAN ### - class Scan(BBOTBaseModel, table=True): id: str = Field(primary_key=True) name: str @@ -134,7 +128,6 @@ class Scan(BBOTBaseModel, table=True): ### TARGET ### - class Target(BBOTBaseModel, table=True): name: str = "Default Target" strict_scope: bool = False diff --git a/bbot/modules/output/http.py b/bbot/modules/output/http.py index 9d9241da0b..7d94148d72 100644 --- a/bbot/modules/output/http.py +++ b/bbot/modules/output/http.py @@ -15,7 +15,6 @@ class HTTP(BaseOutputModule): "username": "", "password": "", "timeout": 10, - "siem_friendly": False, } options_desc = { "url": "Web URL", @@ -24,14 +23,12 @@ class HTTP(BaseOutputModule): "username": "Username (basic auth)", "password": "Password (basic auth)", "timeout": "HTTP timeout", - "siem_friendly": "Format JSON in a SIEM-friendly way for ingestion into Elastic, Splunk, etc.", } async def setup(self): self.url = self.config.get("url", "") self.method = self.config.get("method", "POST") self.timeout = self.config.get("timeout", 10) - self.siem_friendly = self.config.get("siem_friendly", False) self.headers = {} bearer = self.config.get("bearer", "") if bearer: @@ -56,7 +53,7 @@ async def handle_event(self, event): method=self.method, auth=self.auth, headers=self.headers, - json=event.json(siem_friendly=self.siem_friendly), + json=event.json(), ) is_success = False if response is None else response.is_success if not is_success: diff --git a/bbot/modules/output/json.py b/bbot/modules/output/json.py index a35fa6aed7..b93d1e4e3f 100644 --- a/bbot/modules/output/json.py +++ b/bbot/modules/output/json.py @@ -11,20 +11,18 @@ class JSON(BaseOutputModule): "created_date": "2022-04-07", "author": "@TheTechromancer", } - options = {"output_file": "", "siem_friendly": False} + options = {"output_file": ""} options_desc = { "output_file": "Output to file", - "siem_friendly": "Output JSON in a SIEM-friendly format for ingestion into Elastic, Splunk, etc.", } _preserve_graph = True async def setup(self): self._prep_output_dir("output.json") - self.siem_friendly = self.config.get("siem_friendly", False) return True async def handle_event(self, event): - event_json = event.json(siem_friendly=self.siem_friendly) + event_json = event.json() event_str = json.dumps(event_json) if self.file is not None: self.file.write(event_str + "\n") diff --git a/bbot/modules/output/mongo.py b/bbot/modules/output/mongo.py index 03185b169c..5e555ab0ff 100644 --- a/bbot/modules/output/mongo.py +++ b/bbot/modules/output/mongo.py @@ -71,3 +71,11 @@ async def handle_event(self, event): # Insert as a new scan if no existing scan is found await self.scans_collection.insert_one(event_pydantic.model_dump()) self.verbose(f"Inserted new scan event with UUID: {event_pydantic.uuid}") + + target_data = scan_json.get("target", {}) + target = Target(**target_data) + existing_target = await self.targets_collection.find_one({"uuid": target.uuid}) + if existing_target: + await self.targets_collection.replace_one({"uuid": target.uuid}, target.model_dump()) + else: + await self.targets_collection.insert_one(target.model_dump()) diff --git a/bbot/test/test_step_1/test_events.py b/bbot/test/test_step_1/test_events.py index 5c6dedad8a..a940dbce06 100644 --- a/bbot/test/test_step_1/test_events.py +++ b/bbot/test/test_step_1/test_events.py @@ -529,28 +529,17 @@ async def test_events(events, helpers): assert hostless_event_json["data"] == "asdf" assert not "host" in hostless_event_json - # SIEM-friendly serialize/deserialize - json_event_siemfriendly = db_event.json(siem_friendly=True) - assert json_event_siemfriendly["scope_distance"] == 1 - assert json_event_siemfriendly["data"] == {"OPEN_TCP_PORT": "evilcorp.com:80"} - assert json_event_siemfriendly["type"] == "OPEN_TCP_PORT" - assert json_event_siemfriendly["host"] == "evilcorp.com" - assert json_event_siemfriendly["timestamp"] == timestamp - reconstituted_event2 = event_from_json(json_event_siemfriendly, siem_friendly=True) - assert reconstituted_event2.scope_distance == 1 - assert reconstituted_event2.timestamp.timestamp() == timestamp - assert reconstituted_event2.data == "evilcorp.com:80" - assert reconstituted_event2.type == "OPEN_TCP_PORT" - assert reconstituted_event2.host == "evilcorp.com" - assert "127.0.0.1" in reconstituted_event2.resolved_hosts - http_response = scan.make_event(httpx_response, "HTTP_RESPONSE", parent=scan.root_event) assert http_response.parent_id == scan.root_event.id assert http_response.data["input"] == "http://example.com:80" json_event = http_response.json(mode="graph") + assert "data" in json_event + assert "data_json" not in json_event assert isinstance(json_event["data"], str) json_event = http_response.json() - assert isinstance(json_event["data"], dict) + assert "data" not in json_event + assert "data_json" in json_event + assert isinstance(json_event["data_json"], dict) assert json_event["type"] == "HTTP_RESPONSE" assert json_event["host"] == "example.com" assert json_event["parent"] == scan.root_event.id diff --git a/bbot/test/test_step_2/module_tests/test_module_http.py b/bbot/test/test_step_2/module_tests/test_module_http.py index 43b7189adf..d634765425 100644 --- a/bbot/test/test_step_2/module_tests/test_module_http.py +++ b/bbot/test/test_step_2/module_tests/test_module_http.py @@ -52,12 +52,3 @@ def check(self, module_test, events): assert self.headers_correct == True assert self.method_correct == True assert self.url_correct == True - - -class TestHTTPSIEMFriendly(TestHTTP): - modules_overrides = ["http"] - config_overrides = {"modules": {"http": dict(TestHTTP.config_overrides["modules"]["http"])}} - config_overrides["modules"]["http"]["siem_friendly"] = True - - def verify_data(self, j): - return j["data"] == {"DNS_NAME": "blacklanternsecurity.com"} and j["type"] == "DNS_NAME" diff --git a/bbot/test/test_step_2/module_tests/test_module_json.py b/bbot/test/test_step_2/module_tests/test_module_json.py index 27ed5a55e0..bf79eeb13f 100644 --- a/bbot/test/test_step_2/module_tests/test_module_json.py +++ b/bbot/test/test_step_2/module_tests/test_module_json.py @@ -53,18 +53,3 @@ def check(self, module_test, events): assert dns_reconstructed.discovery_context == context_data assert dns_reconstructed.discovery_path == [context_data] assert dns_reconstructed.parent_chain == [dns_json["uuid"]] - - -class TestJSONSIEMFriendly(ModuleTestBase): - modules_overrides = ["json"] - config_overrides = {"modules": {"json": {"siem_friendly": True}}} - - def check(self, module_test, events): - txt_file = module_test.scan.home / "output.json" - lines = list(module_test.scan.helpers.read_file(txt_file)) - passed = False - for line in lines: - e = json.loads(line) - if e["data"] == {"DNS_NAME": "blacklanternsecurity.com"}: - passed = True - assert passed diff --git a/bbot/test/test_step_2/module_tests/test_module_mongo.py b/bbot/test/test_step_2/module_tests/test_module_mongo.py index 31e7f70747..fcfed7841a 100644 --- a/bbot/test/test_step_2/module_tests/test_module_mongo.py +++ b/bbot/test/test_step_2/module_tests/test_module_mongo.py @@ -72,12 +72,16 @@ async def check(self, module_test, events): db = client[self.test_db_name] events_collection = db.get_collection(self.test_collection_prefix + "events") + ### INDEXES ### + # make sure the collection has all the right indexes cursor = events_collection.list_indexes() indexes = await cursor.to_list(length=None) for field in Event._indexed_fields(): assert any(field in index["key"] for index in indexes), f"Index for {field} not found" + ### EVENTS ### + # Fetch all events from the collection cursor = events_collection.find({}) db_events = await cursor.to_list(length=None) @@ -86,11 +90,8 @@ async def check(self, module_test, events): assert len(events_json) == len(db_events) for db_event in db_events: - # we currently don't store timestamps as datetime objects because mongodb has lower precision - # assert isinstance(db_event["timestamp"], datetime) - # assert isinstance(db_event["inserted_at"], datetime) - assert isinstance(db_event["timestamp"], str) - assert isinstance(db_event["inserted_at"], str) + assert isinstance(db_event["timestamp"], float) + assert isinstance(db_event["inserted_at"], float) # Convert to Pydantic objects and dump them db_events_pydantic = [Event(**e).model_dump(exclude_none=True) for e in db_events] @@ -121,6 +122,15 @@ async def check(self, module_test, events): # They should match after removing reverse_host assert events_json == db_events_pydantic, "Events do not match" + ### SCANS ### + + # Fetch all scans from the collection + cursor = db.get_collection(self.test_collection_prefix + "scans").find({}) + db_scans = await cursor.to_list(length=None) + assert len(db_scans) == 1, "There should be exactly one scan" + db_scan = db_scans[0] + assert db_scan["scan"]["id"] == main_event["scan"], "Scan id should match main event scan" + finally: # Clean up: Delete all documents in the collection await events_collection.delete_many({}) diff --git a/docs/scanning/tips_and_tricks.md b/docs/scanning/tips_and_tricks.md index c5073c1d63..e13d82875e 100644 --- a/docs/scanning/tips_and_tricks.md +++ b/docs/scanning/tips_and_tricks.md @@ -108,24 +108,6 @@ config: bbot -t evilcorp.com -p skip_cdns.yml ``` -### Ingest BBOT Data Into SIEM (Elastic, Splunk) - -If your goal is to run a BBOT scan and later feed its data into a SIEM such as Elastic, be sure to enable this option when scanning: - -```bash -bbot -t evilcorp.com -c modules.json.siem_friendly=true -``` - -This ensures the `.data` event attribute is always the same type (a dictionary), by nesting it like so: -```json -{ - "type": "DNS_NAME", - "data": { - "DNS_NAME": "blacklanternsecurity.com" - } -} -``` - ### Custom HTTP Proxy Web pentesters may appreciate BBOT's ability to quickly populate Burp Suite site maps for all subdomains in a target. If your scan includes gowitness, this will capture the traffic as if you manually visited each website in your browser -- including auxiliary web resources and javascript API calls. To accomplish this, set the `web.http_proxy` config option like so: From 72a96eb3c560f5ac8b29a101016310bf2449c328 Mon Sep 17 00:00:00 2001 From: github-actions Date: Thu, 21 Nov 2024 20:00:28 -0500 Subject: [PATCH 09/26] steady work on mongo, bbot 3.0 --- bbot/core/event/base.py | 7 +- bbot/models/helpers.py | 20 +++--- bbot/models/pydantic.py | 71 +++++++++---------- bbot/models/sql.py | 27 +++---- bbot/modules/output/mongo.py | 16 ++--- bbot/scanner/scanner.py | 12 ++-- bbot/test/bbot_fixtures.py | 14 ++-- bbot/test/test_step_1/test_db_models.py | 25 ++++++- bbot/test/test_step_1/test_events.py | 2 +- .../module_tests/test_module_mongo.py | 12 +++- .../module_tests/test_module_sqlite.py | 14 ++++ 11 files changed, 128 insertions(+), 92 deletions(-) diff --git a/bbot/core/event/base.py b/bbot/core/event/base.py index 29f10190e2..bd6e884b37 100644 --- a/bbot/core/event/base.py +++ b/bbot/core/event/base.py @@ -12,6 +12,7 @@ from copy import copy from pathlib import Path from typing import Optional +from zoneinfo import ZoneInfo from contextlib import suppress from radixtarget import RadixTarget from urllib.parse import urljoin, parse_qs @@ -40,7 +41,7 @@ validators, get_file_extension, ) -from bbot.models.helpers import naive_datetime_validator +from bbot.models.helpers import utc_datetime_validator log = logging.getLogger("bbot.core.event") @@ -804,7 +805,7 @@ def json(self, mode="json"): if self.scan: j["scan"] = self.scan.id # timestamp - j["timestamp"] = naive_datetime_validator(self.timestamp).timestamp() + j["timestamp"] = utc_datetime_validator(self.timestamp).timestamp() # parent event parent_id = self.parent_id if parent_id: @@ -1773,7 +1774,7 @@ def event_from_json(j): # accept both isoformat and unix timestamp try: - event.timestamp = datetime.datetime.fromtimestamp(j["timestamp"]) + event.timestamp = datetime.datetime.fromtimestamp(j["timestamp"], ZoneInfo("UTC")) except Exception: event.timestamp = datetime.datetime.fromisoformat(j["timestamp"]) event.scope_distance = j["scope_distance"] diff --git a/bbot/models/helpers.py b/bbot/models/helpers.py index 985c845994..c7fc078a45 100644 --- a/bbot/models/helpers.py +++ b/bbot/models/helpers.py @@ -1,20 +1,22 @@ +from datetime import UTC from datetime import datetime from typing_extensions import Annotated from pydantic.functional_validators import AfterValidator -def naive_datetime_validator(d: datetime) -> datetime: +def utc_datetime_validator(d: datetime) -> datetime: """ - Converts all dates into UTC, then drops timezone information. - - This is needed to prevent inconsistencies in sqlite, because it is timezone-naive. + Converts all dates into UTC """ - # drop timezone info - return d.replace(tzinfo=None) + if d.tzinfo is not None: + return d.astimezone(UTC) + else: + return d.replace(tzinfo=UTC) -def naive_utc_now() -> datetime: - return naive_datetime_validator(datetime.now()) +def utc_now() -> datetime: + return datetime.now(UTC) -NaiveUTC = Annotated[datetime, AfterValidator(naive_datetime_validator)] +def utc_now_timestamp() -> datetime: + return utc_now().timestamp() diff --git a/bbot/models/pydantic.py b/bbot/models/pydantic.py index 0591a93515..356ab2e44c 100644 --- a/bbot/models/pydantic.py +++ b/bbot/models/pydantic.py @@ -1,9 +1,8 @@ import logging -from datetime import datetime from pydantic import BaseModel, ConfigDict, Field -from typing import Optional, List, Union, Annotated, get_type_hints +from typing import Optional, List, Union, Annotated -from bbot.models.helpers import NaiveUTC, naive_utc_now +from bbot.models.helpers import utc_now_timestamp log = logging.getLogger("bbot_server.models") @@ -11,14 +10,6 @@ class BBOTBaseModel(BaseModel): model_config = ConfigDict(extra="ignore") - def model_dump(self, **kwargs): - ret = super().model_dump(**kwargs) - # convert datetime fields to unix timestamps - for datetime_field in self._datetime_fields(): - if datetime_field in ret: - ret[datetime_field] = ret[datetime_field].timestamp() - return ret - def __hash__(self): return hash(self.to_json()) @@ -29,34 +20,37 @@ def __eq__(self, other): def _indexed_fields(cls): return sorted(field_name for field_name, field in cls.model_fields.items() if "indexed" in field.metadata) - @classmethod - def _get_type_hints(cls): - """ - Drills down past all the Annotated, Optional, and Union layers to get the underlying type hint - """ - type_hints = get_type_hints(cls) - unwrapped_type_hints = {} - for field_name in cls.model_fields: - type_hint = type_hints[field_name] - while 1: - if getattr(type_hint, "__origin__", None) in (Annotated, Optional, Union): - type_hint = type_hint.__args__[0] - else: - break - unwrapped_type_hints[field_name] = type_hint - return unwrapped_type_hints - - @classmethod - def _datetime_fields(cls): - datetime_fields = [] - for field_name, type_hint in cls._get_type_hints().items(): - if type_hint == datetime: - datetime_fields.append(field_name) - return sorted(datetime_fields) + # we keep these because they were a lot of work to make and maybe someday they'll be useful again + + # @classmethod + # def _get_type_hints(cls): + # """ + # Drills down past all the Annotated, Optional, and Union layers to get the underlying type hint + # """ + # type_hints = get_type_hints(cls) + # unwrapped_type_hints = {} + # for field_name in cls.model_fields: + # type_hint = type_hints[field_name] + # while 1: + # if getattr(type_hint, "__origin__", None) in (Annotated, Optional, Union): + # type_hint = type_hint.__args__[0] + # else: + # break + # unwrapped_type_hints[field_name] = type_hint + # return unwrapped_type_hints + + # @classmethod + # def _datetime_fields(cls): + # datetime_fields = [] + # for field_name, type_hint in cls._get_type_hints().items(): + # if type_hint == datetime: + # datetime_fields.append(field_name) + # return sorted(datetime_fields) ### EVENT ### + class Event(BBOTBaseModel): uuid: Annotated[str, "indexed", "unique"] id: Annotated[str, "indexed"] @@ -76,7 +70,7 @@ class Event(BBOTBaseModel): scope_distance: int = 10 scan: Annotated[str, "indexed"] timestamp: Annotated[float, "indexed"] - inserted_at: Annotated[Optional[float], "indexed"] = Field(default_factory=naive_utc_now) + inserted_at: Annotated[Optional[float], "indexed"] = Field(default_factory=utc_now_timestamp) parent: Annotated[str, "indexed"] parent_uuid: Annotated[str, "indexed"] tags: List = [] @@ -99,12 +93,13 @@ def get_data(self): ### SCAN ### + class Scan(BBOTBaseModel): id: Annotated[str, "indexed", "unique"] name: str status: Annotated[str, "indexed"] - started_at: Annotated[NaiveUTC, "indexed"] - finished_at: Optional[Annotated[NaiveUTC, "indexed"]] = None + started_at: Annotated[float, "indexed"] + finished_at: Annotated[Optional[float], "indexed"] = None duration_seconds: Optional[float] = None duration: Optional[str] = None target: dict diff --git a/bbot/models/sql.py b/bbot/models/sql.py index 2640e3ca81..8e3e059b00 100644 --- a/bbot/models/sql.py +++ b/bbot/models/sql.py @@ -3,13 +3,15 @@ import json import logging +from datetime import datetime from pydantic import ConfigDict from typing import List, Optional -from datetime import datetime, timezone from typing_extensions import Annotated from pydantic.functional_validators import AfterValidator from sqlmodel import inspect, Column, Field, SQLModel, JSON, String, DateTime as SQLADateTime +from bbot.models.helpers import utc_now_timestamp + log = logging.getLogger("bbot_server.models") @@ -27,14 +29,6 @@ def naive_datetime_validator(d: datetime): NaiveUTC = Annotated[datetime, AfterValidator(naive_datetime_validator)] -class CustomJSONEncoder(json.JSONEncoder): - def default(self, obj): - # handle datetime - if isinstance(obj, datetime): - return obj.isoformat() - return super().default(obj) - - class BBOTBaseModel(SQLModel): model_config = ConfigDict(extra="ignore") @@ -52,7 +46,7 @@ def validated(self): return self def to_json(self, **kwargs): - return json.dumps(self.validated.model_dump(), sort_keys=True, cls=CustomJSONEncoder, **kwargs) + return json.dumps(self.validated.model_dump(), sort_keys=True, **kwargs) @classmethod def _pk_column_names(cls): @@ -67,12 +61,11 @@ def __eq__(self, other): ### EVENT ### + class Event(BBOTBaseModel, table=True): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - if self.data is None and self.data_json is None: - raise ValueError("data or data_json must be provided") if self.host: self.reverse_host = self.host[::-1] @@ -88,12 +81,12 @@ def get_data(self): ) id: str = Field(index=True) type: str = Field(index=True) - scope_description: str data: Optional[str] = Field(default=None, index=True) - data_json: Optional[dict] = Field(default=None) + data_json: Optional[dict] = Field(default=None, sa_type=JSON) host: Optional[str] port: Optional[int] netloc: Optional[str] + scope_description: str # store the host in reversed form for efficient lookups by domain reverse_host: Optional[str] = Field(default="", exclude=True, index=True) resolved_hosts: List = Field(default=[], sa_type=JSON) @@ -101,7 +94,8 @@ def get_data(self): web_spider_distance: int = 10 scope_distance: int = Field(default=10, index=True) scan: str = Field(index=True) - timestamp: NaiveUTC = Field(index=True) + timestamp: float = Field(index=True) + inserted_at: float = Field(default_factory=utc_now_timestamp) parent: str = Field(index=True) tags: List = Field(default=[], sa_type=JSON) module: str = Field(index=True) @@ -109,11 +103,11 @@ def get_data(self): discovery_context: str = "" discovery_path: List[str] = Field(default=[], sa_type=JSON) parent_chain: List[str] = Field(default=[], sa_type=JSON) - inserted_at: NaiveUTC = Field(default_factory=lambda: datetime.now(timezone.utc)) ### SCAN ### + class Scan(BBOTBaseModel, table=True): id: str = Field(primary_key=True) name: str @@ -128,6 +122,7 @@ class Scan(BBOTBaseModel, table=True): ### TARGET ### + class Target(BBOTBaseModel, table=True): name: str = "Default Target" strict_scope: bool = False diff --git a/bbot/modules/output/mongo.py b/bbot/modules/output/mongo.py index 5e555ab0ff..6ad16620f6 100644 --- a/bbot/modules/output/mongo.py +++ b/bbot/modules/output/mongo.py @@ -23,7 +23,7 @@ class Mongo(BaseOutputModule): "database": "The name of the database to use", "username": "The username to use to connect to the database", "password": "The password to use to connect to the database", - "collection_prefix": "Prefix each collection with this string", + "collection_prefix": "Prefix the name of each collection with this string", } deps_pip = ["motor~=3.6.0"] @@ -62,20 +62,20 @@ async def handle_event(self, event): await self.events_collection.insert_one(event_pydantic.model_dump()) if event.type == "SCAN": - scan_json = Scan.from_event(event).model_dump() - existing_scan = await self.scans_collection.find_one({"uuid": event_pydantic.uuid}) + scan_json = Scan(**event.data_json).model_dump() + existing_scan = await self.scans_collection.find_one({"id": event_pydantic.id}) if existing_scan: - await self.scans_collection.replace_one({"uuid": event_pydantic.uuid}, scan_json) - self.verbose(f"Updated scan event with UUID: {event_pydantic.uuid}") + await self.scans_collection.replace_one({"id": event_pydantic.id}, scan_json) + self.verbose(f"Updated scan event with ID: {event_pydantic.id}") else: # Insert as a new scan if no existing scan is found await self.scans_collection.insert_one(event_pydantic.model_dump()) - self.verbose(f"Inserted new scan event with UUID: {event_pydantic.uuid}") + self.verbose(f"Inserted new scan event with ID: {event_pydantic.id}") target_data = scan_json.get("target", {}) target = Target(**target_data) - existing_target = await self.targets_collection.find_one({"uuid": target.uuid}) + existing_target = await self.targets_collection.find_one({"hash": target.hash}) if existing_target: - await self.targets_collection.replace_one({"uuid": target.uuid}, target.model_dump()) + await self.targets_collection.replace_one({"hash": target.hash}, target.model_dump()) else: await self.targets_collection.insert_one(target.model_dump()) diff --git a/bbot/scanner/scanner.py b/bbot/scanner/scanner.py index 62e5c9d3ab..a5b04bc2c7 100644 --- a/bbot/scanner/scanner.py +++ b/bbot/scanner/scanner.py @@ -6,7 +6,7 @@ import regex as re from pathlib import Path from sys import exc_info -from datetime import datetime +from datetime import datetime, UTC from collections import OrderedDict from bbot import __version__ @@ -327,8 +327,8 @@ async def async_start_without_generator(self): async def async_start(self): """ """ - self.start_time = datetime.now() - self.root_event.data["started_at"] = self.start_time.isoformat() + self.start_time = datetime.now(UTC) + self.root_event.data["started_at"] = self.start_time.timestamp() try: await self._prep() @@ -436,7 +436,7 @@ async def _mark_finished(self): else: status = "FINISHED" - self.end_time = datetime.now() + self.end_time = datetime.now(UTC) self.duration = self.end_time - self.start_time self.duration_seconds = self.duration.total_seconds() self.duration_human = self.helpers.human_timedelta(self.duration) @@ -1130,9 +1130,9 @@ def json(self): j["target"] = self.preset.target.json j["preset"] = self.preset.to_dict(redact_secrets=True) if self.start_time is not None: - j["started_at"] = self.start_time.isoformat() + j["started_at"] = self.start_time.timestamp() if self.end_time is not None: - j["finished_at"] = self.end_time.isoformat() + j["finished_at"] = self.end_time.timestamp() if self.duration is not None: j["duration_seconds"] = self.duration_seconds if self.duration_human is not None: diff --git a/bbot/test/bbot_fixtures.py b/bbot/test/bbot_fixtures.py index 229c58a290..4d73d036c1 100644 --- a/bbot/test/bbot_fixtures.py +++ b/bbot/test/bbot_fixtures.py @@ -254,12 +254,12 @@ class bbot_events: return bbot_events -# @pytest.fixture(scope="session", autouse=True) -# def install_all_python_deps(): -# deps_pip = set() -# for module in DEFAULT_PRESET.module_loader.preloaded().values(): -# deps_pip.update(set(module.get("deps", {}).get("pip", []))) +@pytest.fixture(scope="session", autouse=True) +def install_all_python_deps(): + deps_pip = set() + for module in DEFAULT_PRESET.module_loader.preloaded().values(): + deps_pip.update(set(module.get("deps", {}).get("pip", []))) -# constraint_file = tempwordlist(get_python_constraints()) + constraint_file = tempwordlist(get_python_constraints()) -# subprocess.run([sys.executable, "-m", "pip", "install", "--constraint", constraint_file] + list(deps_pip)) + subprocess.run([sys.executable, "-m", "pip", "install", "--constraint", constraint_file] + list(deps_pip)) diff --git a/bbot/test/test_step_1/test_db_models.py b/bbot/test/test_step_1/test_db_models.py index d29e7e79a8..a8088be4f2 100644 --- a/bbot/test/test_step_1/test_db_models.py +++ b/bbot/test/test_step_1/test_db_models.py @@ -1,11 +1,23 @@ -from datetime import datetime +from datetime import datetime, UTC +from zoneinfo import ZoneInfo from bbot.models.pydantic import Event +from bbot.core.event.base import BaseEvent +from bbot.models.helpers import utc_datetime_validator from ..bbot_fixtures import * # noqa def test_pydantic_models(events): + # test datetime helpers + now = datetime.now(ZoneInfo("America/New_York")) + utc_now = utc_datetime_validator(now) + assert now.timestamp() == utc_now.timestamp() + now2 = datetime.fromtimestamp(utc_now.timestamp(), UTC) + assert now2.timestamp() == utc_now.timestamp() + utc_now2 = utc_datetime_validator(now2) + assert utc_now2.timestamp() == utc_now.timestamp() + assert Event._datetime_fields() == ["inserted_at", "timestamp"] test_event = Event(**events.ipv4.json()) @@ -23,18 +35,25 @@ def test_pydantic_models(events): ] # convert events to pydantic and back, making sure they're exactly the same - for event in ("http_response", "finding", "vulnerability", "ipv4", "storage_bucket"): + for event in ("ipv4", "http_response", "finding", "vulnerability", "storage_bucket"): e = getattr(events, event) event_json = e.json() event_pydantic = Event(**event_json) event_pydantic_dict = event_pydantic.model_dump() + event_reconstituted = BaseEvent.from_json(event_pydantic_dict) assert isinstance(event_json["timestamp"], float) assert isinstance(e.timestamp, datetime) assert isinstance(event_pydantic.timestamp, datetime) assert not "inserted_at" in event_json assert isinstance(event_pydantic_dict["timestamp"], float) assert isinstance(event_pydantic_dict["inserted_at"], float) - assert event_pydantic.model_dump(exclude_none=True, exclude=["reverse_host", "inserted_at"]) == event_json + + event_pydantic_dict = event_pydantic.model_dump(exclude_none=True, exclude=["reverse_host", "inserted_at"]) + assert event_pydantic_dict == event_json + event_pydantic_dict.pop("scan") + event_pydantic_dict.pop("module") + event_pydantic_dict.pop("module_sequence") + assert event_reconstituted.json() == event_pydantic_dict # TODO: SQL diff --git a/bbot/test/test_step_1/test_events.py b/bbot/test/test_step_1/test_events.py index a940dbce06..faadbdaae9 100644 --- a/bbot/test/test_step_1/test_events.py +++ b/bbot/test/test_step_1/test_events.py @@ -494,7 +494,7 @@ async def test_events(events, helpers): assert db_event.parent_chain[0] == str(db_event.uuid) assert db_event.parent.uuid == scan.root_event.uuid assert db_event.parent_uuid == scan.root_event.uuid - timestamp = db_event.timestamp.replace(tzinfo=None).timestamp() + timestamp = db_event.timestamp.timestamp() json_event = db_event.json() assert isinstance(json_event["uuid"], str) assert json_event["uuid"] == str(db_event.uuid) diff --git a/bbot/test/test_step_2/module_tests/test_module_mongo.py b/bbot/test/test_step_2/module_tests/test_module_mongo.py index fcfed7841a..ac28e64e7b 100644 --- a/bbot/test/test_step_2/module_tests/test_module_mongo.py +++ b/bbot/test/test_step_2/module_tests/test_module_mongo.py @@ -129,7 +129,17 @@ async def check(self, module_test, events): db_scans = await cursor.to_list(length=None) assert len(db_scans) == 1, "There should be exactly one scan" db_scan = db_scans[0] - assert db_scan["scan"]["id"] == main_event["scan"], "Scan id should match main event scan" + assert db_scan["id"] == main_event["scan"], "Scan id should match main event scan" + + ### TARGETS ### + + # Fetch all targets from the collection + cursor = db.get_collection(self.test_collection_prefix + "targets").find({}) + db_targets = await cursor.to_list(length=None) + assert len(db_targets) == 1, "There should be exactly one target" + db_target = db_targets[0] + scan_event = next(e for e in events if e.type == "SCAN") + assert db_target["hash"] == scan_event.data["target"]["hash"], "Target hash should match scan target hash" finally: # Clean up: Delete all documents in the collection diff --git a/bbot/test/test_step_2/module_tests/test_module_sqlite.py b/bbot/test/test_step_2/module_tests/test_module_sqlite.py index ec80b7555d..7970627b15 100644 --- a/bbot/test/test_step_2/module_tests/test_module_sqlite.py +++ b/bbot/test/test_step_2/module_tests/test_module_sqlite.py @@ -8,6 +8,8 @@ class TestSQLite(ModuleTestBase): def check(self, module_test, events): sqlite_output_file = module_test.scan.home / "output.sqlite" assert sqlite_output_file.exists(), "SQLite output file not found" + + # first connect with raw sqlite with sqlite3.connect(sqlite_output_file) as db: cursor = db.cursor() results = cursor.execute("SELECT * FROM event").fetchall() @@ -16,3 +18,15 @@ def check(self, module_test, events): assert len(results) == 1, "No scans found in SQLite database" results = cursor.execute("SELECT * FROM target").fetchall() assert len(results) == 1, "No targets found in SQLite database" + + # then connect with bbot models + from bbot.models.sql import Event + from sqlmodel import create_engine, Session, select + + engine = create_engine(f"sqlite:///{sqlite_output_file}") + + with Session(engine) as session: + statement = select(Event).where(Event.host == "evilcorp.com") + event = session.exec(statement).first() + assert event.host == "evilcorp.com", "Event host should match target host" + assert event.data == "evilcorp.com", "Event data should match target host" From 67e00dce7964387cc6522ff19d1336343a5af31b Mon Sep 17 00:00:00 2001 From: github-actions Date: Thu, 21 Nov 2024 20:02:48 -0500 Subject: [PATCH 10/26] flaked --- bbot/models/helpers.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/bbot/models/helpers.py b/bbot/models/helpers.py index c7fc078a45..47959ad4ac 100644 --- a/bbot/models/helpers.py +++ b/bbot/models/helpers.py @@ -1,7 +1,5 @@ from datetime import UTC from datetime import datetime -from typing_extensions import Annotated -from pydantic.functional_validators import AfterValidator def utc_datetime_validator(d: datetime) -> datetime: From 6a727d57c0d7922e311071ccd6bda5ff9a62230d Mon Sep 17 00:00:00 2001 From: github-actions Date: Thu, 21 Nov 2024 20:41:45 -0500 Subject: [PATCH 11/26] fix tests --- bbot/core/event/base.py | 7 ++++++- bbot/test/test_step_1/test_bbot_fastapi.py | 4 ++-- bbot/test/test_step_1/test_db_models.py | 8 ++++---- bbot/test/test_step_1/test_scan.py | 2 +- bbot/test/test_step_2/module_tests/test_module_json.py | 8 ++++---- bbot/test/test_step_2/module_tests/test_module_splunk.py | 2 +- 6 files changed, 18 insertions(+), 13 deletions(-) diff --git a/bbot/core/event/base.py b/bbot/core/event/base.py index bd6e884b37..d4a37b8f24 100644 --- a/bbot/core/event/base.py +++ b/bbot/core/event/base.py @@ -1762,7 +1762,12 @@ def event_from_json(j): "context": j.get("discovery_context", None), "dummy": True, } - data = j.get("data_json", j.get("data", None)) + data = j.get("data_json", None) + if data is None: + data = j.get("data", None) + if data is None: + json_pretty = json.dumps(j, indent=2) + raise ValueError(f"data or data_json must be provided. JSON: {json_pretty}") kwargs["data"] = data event = make_event(**kwargs) event_uuid = j.get("uuid", None) diff --git a/bbot/test/test_step_1/test_bbot_fastapi.py b/bbot/test/test_step_1/test_bbot_fastapi.py index bad4020712..617f95abbf 100644 --- a/bbot/test/test_step_1/test_bbot_fastapi.py +++ b/bbot/test/test_step_1/test_bbot_fastapi.py @@ -28,7 +28,7 @@ def test_bbot_multiprocess(bbot_httpserver): assert len(events) >= 3 scan_events = [e for e in events if e["type"] == "SCAN"] assert len(scan_events) == 2 - assert any([e["data"] == "test@blacklanternsecurity.com" for e in events]) + assert any([e.get("data", "") == "test@blacklanternsecurity.com" for e in events]) def test_bbot_fastapi(bbot_httpserver): @@ -61,7 +61,7 @@ def test_bbot_fastapi(bbot_httpserver): assert len(events) >= 3 scan_events = [e for e in events if e["type"] == "SCAN"] assert len(scan_events) == 2 - assert any([e["data"] == "test@blacklanternsecurity.com" for e in events]) + assert any([e.get("data", "") == "test@blacklanternsecurity.com" for e in events]) finally: with suppress(Exception): diff --git a/bbot/test/test_step_1/test_db_models.py b/bbot/test/test_step_1/test_db_models.py index a8088be4f2..c29cc09a4f 100644 --- a/bbot/test/test_step_1/test_db_models.py +++ b/bbot/test/test_step_1/test_db_models.py @@ -18,12 +18,12 @@ def test_pydantic_models(events): utc_now2 = utc_datetime_validator(now2) assert utc_now2.timestamp() == utc_now.timestamp() - assert Event._datetime_fields() == ["inserted_at", "timestamp"] - test_event = Event(**events.ipv4.json()) assert sorted(test_event._indexed_fields()) == [ + "data", "host", "id", + "inserted_at", "module", "parent", "parent_uuid", @@ -40,10 +40,10 @@ def test_pydantic_models(events): event_json = e.json() event_pydantic = Event(**event_json) event_pydantic_dict = event_pydantic.model_dump() - event_reconstituted = BaseEvent.from_json(event_pydantic_dict) + event_reconstituted = BaseEvent.from_json(event_pydantic.model_dump(exclude_none=True)) assert isinstance(event_json["timestamp"], float) assert isinstance(e.timestamp, datetime) - assert isinstance(event_pydantic.timestamp, datetime) + assert isinstance(event_pydantic.timestamp, float) assert not "inserted_at" in event_json assert isinstance(event_pydantic_dict["timestamp"], float) assert isinstance(event_pydantic_dict["inserted_at"], float) diff --git a/bbot/test/test_step_1/test_scan.py b/bbot/test/test_step_1/test_scan.py index f5f8458262..5514571fa8 100644 --- a/bbot/test/test_step_1/test_scan.py +++ b/bbot/test/test_step_1/test_scan.py @@ -144,7 +144,7 @@ async def test_python_output_matches_json(bbot_scanner): assert len(events) == 5 scan_events = [e for e in events if e["type"] == "SCAN"] assert len(scan_events) == 2 - assert all([isinstance(e["data"]["status"], str) for e in scan_events]) + assert all([isinstance(e["data_json"]["status"], str) for e in scan_events]) assert len([e for e in events if e["type"] == "DNS_NAME"]) == 1 assert len([e for e in events if e["type"] == "ORG_STUB"]) == 1 assert len([e for e in events if e["type"] == "IP_ADDRESS"]) == 1 diff --git a/bbot/test/test_step_2/module_tests/test_module_json.py b/bbot/test/test_step_2/module_tests/test_module_json.py index bf79eeb13f..3641574213 100644 --- a/bbot/test/test_step_2/module_tests/test_module_json.py +++ b/bbot/test/test_step_2/module_tests/test_module_json.py @@ -23,13 +23,13 @@ def check(self, module_test, events): assert len(dns_json) == 1 dns_json = dns_json[0] scan = scan_json[0] - assert scan["data"]["name"] == module_test.scan.name - assert scan["data"]["id"] == module_test.scan.id + assert scan["data_json"]["name"] == module_test.scan.name + assert scan["data_json"]["id"] == module_test.scan.id assert scan["id"] == module_test.scan.id assert scan["uuid"] == str(module_test.scan.root_event.uuid) assert scan["parent_uuid"] == str(module_test.scan.root_event.uuid) - assert scan["data"]["target"]["seeds"] == ["blacklanternsecurity.com"] - assert scan["data"]["target"]["whitelist"] == ["blacklanternsecurity.com"] + assert scan["data_json"]["target"]["seeds"] == ["blacklanternsecurity.com"] + assert scan["data_json"]["target"]["whitelist"] == ["blacklanternsecurity.com"] assert dns_json["data"] == dns_data assert dns_json["id"] == str(dns_event.id) assert dns_json["uuid"] == str(dns_event.uuid) diff --git a/bbot/test/test_step_2/module_tests/test_module_splunk.py b/bbot/test/test_step_2/module_tests/test_module_splunk.py index d55ed17c27..eef148944c 100644 --- a/bbot/test/test_step_2/module_tests/test_module_splunk.py +++ b/bbot/test/test_step_2/module_tests/test_module_splunk.py @@ -23,7 +23,7 @@ def verify_data(self, j): if not j["index"] == "bbot_index": return False data = j["event"] - if not data["data"] == "blacklanternsecurity.com" and data["type"] == "DNS_NAME": + if not data["data_json"] == "blacklanternsecurity.com" and data["type"] == "DNS_NAME": return False return True From 87a1517e084348bb2b8c8ee1fdb1e763e4bef9ae Mon Sep 17 00:00:00 2001 From: github-actions Date: Thu, 21 Nov 2024 21:19:07 -0500 Subject: [PATCH 12/26] fix utc bug --- bbot/scanner/scanner.py | 7 ++++--- bbot/test/test_step_1/test_db_models.py | 4 ++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/bbot/scanner/scanner.py b/bbot/scanner/scanner.py index a5b04bc2c7..0915c4cb91 100644 --- a/bbot/scanner/scanner.py +++ b/bbot/scanner/scanner.py @@ -6,7 +6,8 @@ import regex as re from pathlib import Path from sys import exc_info -from datetime import datetime, UTC +from datetime import datetime +from zoneinfo import ZoneInfo from collections import OrderedDict from bbot import __version__ @@ -327,7 +328,7 @@ async def async_start_without_generator(self): async def async_start(self): """ """ - self.start_time = datetime.now(UTC) + self.start_time = datetime.now(ZoneInfo("UTC")) self.root_event.data["started_at"] = self.start_time.timestamp() try: await self._prep() @@ -436,7 +437,7 @@ async def _mark_finished(self): else: status = "FINISHED" - self.end_time = datetime.now(UTC) + self.end_time = datetime.now(ZoneInfo("UTC")) self.duration = self.end_time - self.start_time self.duration_seconds = self.duration.total_seconds() self.duration_human = self.helpers.human_timedelta(self.duration) diff --git a/bbot/test/test_step_1/test_db_models.py b/bbot/test/test_step_1/test_db_models.py index c29cc09a4f..9c71390696 100644 --- a/bbot/test/test_step_1/test_db_models.py +++ b/bbot/test/test_step_1/test_db_models.py @@ -1,4 +1,4 @@ -from datetime import datetime, UTC +from datetime import datetime from zoneinfo import ZoneInfo from bbot.models.pydantic import Event @@ -13,7 +13,7 @@ def test_pydantic_models(events): now = datetime.now(ZoneInfo("America/New_York")) utc_now = utc_datetime_validator(now) assert now.timestamp() == utc_now.timestamp() - now2 = datetime.fromtimestamp(utc_now.timestamp(), UTC) + now2 = datetime.fromtimestamp(utc_now.timestamp(), ZoneInfo("UTC")) assert now2.timestamp() == utc_now.timestamp() utc_now2 = utc_datetime_validator(now2) assert utc_now2.timestamp() == utc_now.timestamp() From 06dbe0bac478e838b49e43c318491df4fd289294 Mon Sep 17 00:00:00 2001 From: github-actions Date: Fri, 22 Nov 2024 01:32:32 -0500 Subject: [PATCH 13/26] fix tests --- bbot/models/helpers.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/bbot/models/helpers.py b/bbot/models/helpers.py index 47959ad4ac..b94bc976cc 100644 --- a/bbot/models/helpers.py +++ b/bbot/models/helpers.py @@ -1,5 +1,5 @@ -from datetime import UTC from datetime import datetime +from zoneinfo import ZoneInfo def utc_datetime_validator(d: datetime) -> datetime: @@ -7,13 +7,13 @@ def utc_datetime_validator(d: datetime) -> datetime: Converts all dates into UTC """ if d.tzinfo is not None: - return d.astimezone(UTC) + return d.astimezone(ZoneInfo("UTC")) else: - return d.replace(tzinfo=UTC) + return d.replace(tzinfo=ZoneInfo("UTC")) def utc_now() -> datetime: - return datetime.now(UTC) + return datetime.now(ZoneInfo("UTC")) def utc_now_timestamp() -> datetime: From 4e29e0d209814217c14f6ffb6004f2038d653d7f Mon Sep 17 00:00:00 2001 From: github-actions Date: Fri, 22 Nov 2024 01:30:39 -0500 Subject: [PATCH 14/26] elastic module --- bbot/modules/output/elastic.py | 22 +++ bbot/modules/output/http.py | 6 +- .../module_tests/test_module_elastic.py | 130 ++++++++++++++++++ docs/scanning/output.md | 25 ++-- 4 files changed, 171 insertions(+), 12 deletions(-) create mode 100644 bbot/modules/output/elastic.py create mode 100644 bbot/test/test_step_2/module_tests/test_module_elastic.py diff --git a/bbot/modules/output/elastic.py b/bbot/modules/output/elastic.py new file mode 100644 index 0000000000..15bc023df8 --- /dev/null +++ b/bbot/modules/output/elastic.py @@ -0,0 +1,22 @@ +from .http import HTTP + + +class Elastic(HTTP): + watched_events = ["*"] + metadata = { + "description": "Send scan results to Elasticsearch", + "created_date": "2022-11-21", + "author": "@TheTechromancer", + } + options = { + "url": "", + "username": "elastic", + "password": "changeme", + "timeout": 10, + } + options_desc = { + "url": "Elastic URL (e.g. https://localhost:9200//_doc)", + "username": "Elastic username", + "password": "Elastic password", + "timeout": "HTTP timeout", + } diff --git a/bbot/modules/output/http.py b/bbot/modules/output/http.py index 7d94148d72..0af65a87d2 100644 --- a/bbot/modules/output/http.py +++ b/bbot/modules/output/http.py @@ -1,3 +1,4 @@ +from bbot.models.pydantic import Event from bbot.modules.output.base import BaseOutputModule @@ -48,12 +49,15 @@ async def setup(self): async def handle_event(self, event): while 1: + event_json = event.json() + event_pydantic = Event(**event_json) + event_json = event_pydantic.model_dump(exclude_none=True) response = await self.helpers.request( url=self.url, method=self.method, auth=self.auth, headers=self.headers, - json=event.json(), + json=event_json, ) is_success = False if response is None else response.is_success if not is_success: diff --git a/bbot/test/test_step_2/module_tests/test_module_elastic.py b/bbot/test/test_step_2/module_tests/test_module_elastic.py new file mode 100644 index 0000000000..710c22e0f0 --- /dev/null +++ b/bbot/test/test_step_2/module_tests/test_module_elastic.py @@ -0,0 +1,130 @@ +import time +import httpx +import asyncio + +from .base import ModuleTestBase + + +class TestElastic(ModuleTestBase): + config_overrides = { + "modules": { + "elastic": { + "url": "https://localhost:9200/bbot_test_events/_doc", + "username": "elastic", + "password": "bbotislife", + } + } + } + skip_distro_tests = True + + async def setup_before_prep(self, module_test): + # Start Elasticsearch container + await asyncio.create_subprocess_exec( + "docker", + "run", + "--name", + "bbot-test-elastic", + "--rm", + "-e", + "ELASTIC_PASSWORD=bbotislife", + "-e", + "cluster.routing.allocation.disk.watermark.low=96%", + "-e", + "cluster.routing.allocation.disk.watermark.high=97%", + "-e", + "cluster.routing.allocation.disk.watermark.flood_stage=98%", + "-p", + "9200:9200", + "-d", + "docker.elastic.co/elasticsearch/elasticsearch:8.16.0", + ) + + # Connect to Elasticsearch with retry logic + async with httpx.AsyncClient(verify=False) as client: + while True: + try: + # Attempt a simple operation to confirm the connection + response = await client.get("https://localhost:9200/_cat/health", auth=("elastic", "bbotislife")) + response.raise_for_status() + break + except Exception as e: + print(f"Connection failed: {e}. Retrying...", flush=True) + time.sleep(0.5) + + # Ensure the index is empty + await client.delete(f"https://localhost:9200/bbot_test_events", auth=("elastic", "bbotislife")) + print("Elasticsearch index cleaned up", flush=True) + + async def check(self, module_test, events): + try: + from bbot.models.pydantic import Event + + events_json = [e.json() for e in events] + events_json.sort(key=lambda x: x["timestamp"]) + + # Connect to Elasticsearch + async with httpx.AsyncClient(verify=False) as client: + + # refresh the index + await client.post(f"https://localhost:9200/bbot_test_events/_refresh", auth=("elastic", "bbotislife")) + + # Fetch all events from the index + response = await client.get( + f"https://localhost:9200/bbot_test_events/_search?size=100", auth=("elastic", "bbotislife") + ) + response_json = response.json() + import json + + print(f"response: {json.dumps(response_json, indent=2)}") + db_events = [hit["_source"] for hit in response_json["hits"]["hits"]] + + # make sure we have the same number of events + assert len(events_json) == len(db_events) + + for db_event in db_events: + assert isinstance(db_event["timestamp"], float) + assert isinstance(db_event["inserted_at"], float) + + # Convert to Pydantic objects and dump them + db_events_pydantic = [Event(**e).model_dump(exclude_none=True) for e in db_events] + db_events_pydantic.sort(key=lambda x: x["timestamp"]) + + # Find the main event with type DNS_NAME and data blacklanternsecurity.com + main_event = next( + ( + e + for e in db_events_pydantic + if e.get("type") == "DNS_NAME" and e.get("data") == "blacklanternsecurity.com" + ), + None, + ) + assert ( + main_event is not None + ), "Main event with type DNS_NAME and data blacklanternsecurity.com not found" + + # Ensure it has the reverse_host attribute + expected_reverse_host = "blacklanternsecurity.com"[::-1] + assert ( + main_event.get("reverse_host") == expected_reverse_host + ), f"reverse_host attribute is not correct, expected {expected_reverse_host}" + + # Events don't match exactly because the elastic ones have reverse_host and inserted_at + assert events_json != db_events_pydantic + for db_event in db_events_pydantic: + db_event.pop("reverse_host") + db_event.pop("inserted_at") + # They should match after removing reverse_host + assert events_json == db_events_pydantic, "Events do not match" + + finally: + # Clean up: Delete all documents in the index + async with httpx.AsyncClient(verify=False) as client: + response = await client.delete( + f"https://localhost:9200/bbot_test_events", + auth=("elastic", "bbotislife"), + params={"ignore": "400,404"}, + ) + print(f"Deleted documents from index", flush=True) + await asyncio.create_subprocess_exec( + "docker", "stop", "bbot-test-elastic", stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE + ) diff --git a/docs/scanning/output.md b/docs/scanning/output.md index dd45a5c833..16cfbd3593 100644 --- a/docs/scanning/output.md +++ b/docs/scanning/output.md @@ -155,15 +155,20 @@ config: ### Elasticsearch -When outputting to Elastic, use the `http` output module with the following settings (replace `` with your desired index, e.g. `bbot`): +- Step 1: Spin up a quick Elasticsearch docker image + +```bash +docker run -d -p 9200:9200 --name=bbot-elastic --v "$(pwd)/elastic_data:/usr/share/elasticsearch/data" -e ELASTIC_PASSWORD=bbotislife -m 1GB docker.elastic.co/elasticsearch/elasticsearch:8.16.0 +``` + +- Step 2: Execute a scan with `elastic` output module ```bash # send scan results directly to elasticsearch -bbot -t evilcorp.com -om http -c \ - modules.http.url=http://localhost:8000//_doc \ - modules.http.siem_friendly=true \ - modules.http.username=elastic \ - modules.http.password=changeme +# note: you can replace "bbot_events" with your own index name +bbot -t evilcorp.com -om elastic -c \ + modules.elastic.url=https://localhost:9200/bbot_events/_doc \ + modules.elastic.password=bbotislife ``` Alternatively, via a preset: @@ -171,11 +176,9 @@ Alternatively, via a preset: ```yaml title="elastic_preset.yml" config: modules: - http: - url: http://localhost:8000//_doc - siem_friendly: true - username: elastic - password: changeme + elastic: + url: http://localhost:9200/bbot_events/_doc + password: bbotislife ``` ### Splunk From 6e18da7a2fa4d1e7e71cdcd35f7da3927b59a150 Mon Sep 17 00:00:00 2001 From: github-actions Date: Fri, 22 Nov 2024 18:30:05 -0500 Subject: [PATCH 15/26] new module: kafka --- bbot/core/event/base.py | 2 +- bbot/modules/output/elastic.py | 14 ++- bbot/modules/output/kafka.py | 42 +++++++ bbot/scanner/scanner.py | 6 +- .../module_tests/test_module_elastic.py | 9 +- .../module_tests/test_module_kafka.py | 108 ++++++++++++++++++ 6 files changed, 167 insertions(+), 14 deletions(-) create mode 100644 bbot/modules/output/kafka.py create mode 100644 bbot/test/test_step_2/module_tests/test_module_kafka.py diff --git a/bbot/core/event/base.py b/bbot/core/event/base.py index d4a37b8f24..066e7469fb 100644 --- a/bbot/core/event/base.py +++ b/bbot/core/event/base.py @@ -814,7 +814,7 @@ def json(self, mode="json"): if parent_uuid: j["parent_uuid"] = parent_uuid # tags - j.update({"tags": list(self.tags)}) + j.update({"tags": sorted(self.tags)}) # parent module if self.module: j.update({"module": str(self.module)}) diff --git a/bbot/modules/output/elastic.py b/bbot/modules/output/elastic.py index 15bc023df8..42c331c516 100644 --- a/bbot/modules/output/elastic.py +++ b/bbot/modules/output/elastic.py @@ -2,6 +2,10 @@ class Elastic(HTTP): + """ + docker run -d -p 9200:9200 --name=bbot-elastic --v "$(pwd)/elastic_data:/usr/share/elasticsearch/data" -e ELASTIC_PASSWORD=bbotislife -m 1GB docker.elastic.co/elasticsearch/elasticsearch:8.16.0 + """ + watched_events = ["*"] metadata = { "description": "Send scan results to Elasticsearch", @@ -9,9 +13,9 @@ class Elastic(HTTP): "author": "@TheTechromancer", } options = { - "url": "", + "url": "https://localhost:9200/bbot_events/_doc", "username": "elastic", - "password": "changeme", + "password": "bbotislife", "timeout": 10, } options_desc = { @@ -20,3 +24,9 @@ class Elastic(HTTP): "password": "Elastic password", "timeout": "HTTP timeout", } + + async def cleanup(self): + # refresh the index + doc_regex = self.helpers.re.compile(r"/[^/]+$") + refresh_url = doc_regex.sub("/_refresh", self.url) + await self.helpers.request(refresh_url, auth=self.auth) diff --git a/bbot/modules/output/kafka.py b/bbot/modules/output/kafka.py new file mode 100644 index 0000000000..5b2db13d60 --- /dev/null +++ b/bbot/modules/output/kafka.py @@ -0,0 +1,42 @@ +import json +from aiokafka import AIOKafkaProducer + +from bbot.modules.output.base import BaseOutputModule + + +class Kafka(BaseOutputModule): + watched_events = ["*"] + meta = { + "description": "Output scan data to a Kafka topic", + "created_date": "2024-11-17", + "author": "@TheTechromancer", + } + options = { + "bootstrap_servers": "localhost:9092", + "topic": "bbot_events", + } + options_desc = { + "bootstrap_servers": "A comma-separated list of Kafka server addresses", + "topic": "The Kafka topic to publish events to", + } + deps_pip = ["aiokafka~=0.12.0"] + + async def setup(self): + self.bootstrap_servers = self.config.get("bootstrap_servers", "localhost:9092") + self.topic = self.config.get("topic", "bbot_events") + self.producer = AIOKafkaProducer(bootstrap_servers=self.bootstrap_servers) + + # Start the producer + await self.producer.start() + self.verbose("Kafka producer started successfully") + return True + + async def handle_event(self, event): + event_json = event.json() + event_data = json.dumps(event_json).encode("utf-8") + await self.producer.send_and_wait(self.topic, event_data) + + async def cleanup(self): + # Stop the producer + await self.producer.stop() + self.verbose("Kafka producer stopped successfully") diff --git a/bbot/scanner/scanner.py b/bbot/scanner/scanner.py index 0915c4cb91..0db6e1225e 100644 --- a/bbot/scanner/scanner.py +++ b/bbot/scanner/scanner.py @@ -865,15 +865,15 @@ async def _cleanup(self): if not self._cleanedup: self._cleanedup = True self.status = "CLEANING_UP" + # clean up modules + for mod in self.modules.values(): + await mod._cleanup() # clean up dns engine if self.helpers._dns is not None: await self.helpers.dns.shutdown() # clean up web engine if self.helpers._web is not None: await self.helpers.web.shutdown() - # clean up modules - for mod in self.modules.values(): - await mod._cleanup() with contextlib.suppress(Exception): self.home.rmdir() self.helpers.clean_old_scans() diff --git a/bbot/test/test_step_2/module_tests/test_module_elastic.py b/bbot/test/test_step_2/module_tests/test_module_elastic.py index 710c22e0f0..2f8891a640 100644 --- a/bbot/test/test_step_2/module_tests/test_module_elastic.py +++ b/bbot/test/test_step_2/module_tests/test_module_elastic.py @@ -48,12 +48,11 @@ async def setup_before_prep(self, module_test): response.raise_for_status() break except Exception as e: - print(f"Connection failed: {e}. Retrying...", flush=True) + self.log.verbose(f"Connection failed: {e}. Retrying...", flush=True) time.sleep(0.5) # Ensure the index is empty await client.delete(f"https://localhost:9200/bbot_test_events", auth=("elastic", "bbotislife")) - print("Elasticsearch index cleaned up", flush=True) async def check(self, module_test, events): try: @@ -65,17 +64,11 @@ async def check(self, module_test, events): # Connect to Elasticsearch async with httpx.AsyncClient(verify=False) as client: - # refresh the index - await client.post(f"https://localhost:9200/bbot_test_events/_refresh", auth=("elastic", "bbotislife")) - # Fetch all events from the index response = await client.get( f"https://localhost:9200/bbot_test_events/_search?size=100", auth=("elastic", "bbotislife") ) response_json = response.json() - import json - - print(f"response: {json.dumps(response_json, indent=2)}") db_events = [hit["_source"] for hit in response_json["hits"]["hits"]] # make sure we have the same number of events diff --git a/bbot/test/test_step_2/module_tests/test_module_kafka.py b/bbot/test/test_step_2/module_tests/test_module_kafka.py new file mode 100644 index 0000000000..6a81173561 --- /dev/null +++ b/bbot/test/test_step_2/module_tests/test_module_kafka.py @@ -0,0 +1,108 @@ +import json +import asyncio +from contextlib import suppress + +from .base import ModuleTestBase + + +class TestKafka(ModuleTestBase): + config_overrides = { + "modules": { + "kafka": { + "bootstrap_servers": "localhost:9092", + "topic": "bbot_events", + } + } + } + skip_distro_tests = True + + async def setup_before_prep(self, module_test): + # Start Zookeeper + await asyncio.create_subprocess_exec( + "docker", "run", "-d", "--rm", "--name", "bbot-test-zookeeper", "-p", "2181:2181", "zookeeper:3.9" + ) + + # Wait for Zookeeper to be ready + while True: + try: + # Attempt to connect to Zookeeper with a timeout + reader, writer = await asyncio.wait_for(asyncio.open_connection("localhost", 2181), timeout=0.5) + break # Exit the loop if the connection is successful + except Exception as e: + self.log.verbose(f"Waiting for Zookeeper to be ready: {e}") + await asyncio.sleep(0.5) # Wait a bit before retrying + finally: + with suppress(Exception): + writer.close() + await writer.wait_closed() + + # Start Kafka using wurstmeister/kafka + await asyncio.create_subprocess_exec( + "docker", + "run", + "-d", + "--rm", + "--name", + "bbot-test-kafka", + "--link", + "bbot-test-zookeeper:zookeeper", + "-e", + "KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181", + "-e", + "KAFKA_LISTENERS=PLAINTEXT://0.0.0.0:9092", + "-e", + "KAFKA_ADVERTISED_LISTENERS=PLAINTEXT://localhost:9092", + "-e", + "KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR=1", + "-p", + "9092:9092", + "wurstmeister/kafka", + ) + + from aiokafka import AIOKafkaConsumer + + # Wait for Kafka to be ready + while True: + try: + self.consumer = AIOKafkaConsumer( + "bbot_events", + bootstrap_servers="localhost:9092", + group_id="test_group", + ) + await self.consumer.start() + break # Exit the loop if the consumer starts successfully + except Exception as e: + self.log.verbose(f"Waiting for Kafka to be ready: {e}") + if hasattr(self, "consumer") and not self.consumer._closed: + await self.consumer.stop() + await asyncio.sleep(0.5) # Wait a bit before retrying + + async def check(self, module_test, events): + try: + events_json = [e.json() for e in events] + events_json.sort(key=lambda x: x["timestamp"]) + + # Collect events from Kafka + kafka_events = [] + async for msg in self.consumer: + event_data = json.loads(msg.value.decode("utf-8")) + kafka_events.append(event_data) + if len(kafka_events) >= len(events_json): + break + + kafka_events.sort(key=lambda x: x["timestamp"]) + + # Verify the events match + assert events_json == kafka_events, "Events do not match" + + finally: + # Clean up: Stop the Kafka consumer + if hasattr(self, "consumer") and not self.consumer._closed: + await self.consumer.stop() + # Stop Kafka and Zookeeper containers + await asyncio.create_subprocess_exec( + "docker", "stop", "bbot-test-kafka", stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE + ) + await asyncio.create_subprocess_exec( + "docker", "stop", "bbot-test-zookeeper", stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE + ) From c7cde580b3dcc920d20d4e214ba463f031a801c2 Mon Sep 17 00:00:00 2001 From: github-actions Date: Fri, 22 Nov 2024 18:56:45 -0500 Subject: [PATCH 16/26] fix elastic tests --- bbot/test/test_step_2/module_tests/test_module_elastic.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bbot/test/test_step_2/module_tests/test_module_elastic.py b/bbot/test/test_step_2/module_tests/test_module_elastic.py index 2f8891a640..db9f2359f7 100644 --- a/bbot/test/test_step_2/module_tests/test_module_elastic.py +++ b/bbot/test/test_step_2/module_tests/test_module_elastic.py @@ -48,7 +48,7 @@ async def setup_before_prep(self, module_test): response.raise_for_status() break except Exception as e: - self.log.verbose(f"Connection failed: {e}. Retrying...", flush=True) + self.log.verbose(f"Connection failed: {e}. Retrying...") time.sleep(0.5) # Ensure the index is empty @@ -117,7 +117,7 @@ async def check(self, module_test, events): auth=("elastic", "bbotislife"), params={"ignore": "400,404"}, ) - print(f"Deleted documents from index", flush=True) + self.log.verbose(f"Deleted documents from index") await asyncio.create_subprocess_exec( "docker", "stop", "bbot-test-elastic", stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE ) From 1e2d873e27fdaa1a635be04d6dc98c51d06a064a Mon Sep 17 00:00:00 2001 From: github-actions Date: Fri, 22 Nov 2024 18:45:47 -0500 Subject: [PATCH 17/26] rabbitmq module --- bbot/modules/output/kafka.py | 2 +- bbot/modules/output/rabbitmq.py | 50 ++++++++++++++ .../module_tests/test_module_rabbitmq.py | 69 +++++++++++++++++++ 3 files changed, 120 insertions(+), 1 deletion(-) create mode 100644 bbot/modules/output/rabbitmq.py create mode 100644 bbot/test/test_step_2/module_tests/test_module_rabbitmq.py diff --git a/bbot/modules/output/kafka.py b/bbot/modules/output/kafka.py index 5b2db13d60..9b99710928 100644 --- a/bbot/modules/output/kafka.py +++ b/bbot/modules/output/kafka.py @@ -8,7 +8,7 @@ class Kafka(BaseOutputModule): watched_events = ["*"] meta = { "description": "Output scan data to a Kafka topic", - "created_date": "2024-11-17", + "created_date": "2024-11-22", "author": "@TheTechromancer", } options = { diff --git a/bbot/modules/output/rabbitmq.py b/bbot/modules/output/rabbitmq.py new file mode 100644 index 0000000000..64c094dfbf --- /dev/null +++ b/bbot/modules/output/rabbitmq.py @@ -0,0 +1,50 @@ +import json +import aio_pika + +from bbot.modules.output.base import BaseOutputModule + + +class RabbitMQ(BaseOutputModule): + watched_events = ["*"] + meta = { + "description": "Output scan data to a RabbitMQ queue", + "created_date": "2024-11-22", + "author": "@TheTechromancer", + } + options = { + "url": "amqp://guest:guest@localhost/", + "queue": "bbot_events", + } + options_desc = { + "url": "The RabbitMQ connection URL", + "queue": "The RabbitMQ queue to publish events to", + } + deps_pip = ["aio_pika~=9.5.0"] + + async def setup(self): + self.rabbitmq_url = self.config.get("url", "amqp://guest:guest@localhost/") + self.queue_name = self.config.get("queue", "bbot_events") + + # Connect to RabbitMQ + self.connection = await aio_pika.connect_robust(self.rabbitmq_url) + self.channel = await self.connection.channel() + + # Declare the queue + self.queue = await self.channel.declare_queue(self.queue_name, durable=True) + self.verbose("RabbitMQ connection and queue setup successfully") + return True + + async def handle_event(self, event): + event_json = event.json() + event_data = json.dumps(event_json).encode("utf-8") + + # Publish the message to the queue + await self.channel.default_exchange.publish( + aio_pika.Message(body=event_data), + routing_key=self.queue_name, + ) + + async def cleanup(self): + # Close the connection + await self.connection.close() + self.verbose("RabbitMQ connection closed successfully") diff --git a/bbot/test/test_step_2/module_tests/test_module_rabbitmq.py b/bbot/test/test_step_2/module_tests/test_module_rabbitmq.py new file mode 100644 index 0000000000..d05808c2da --- /dev/null +++ b/bbot/test/test_step_2/module_tests/test_module_rabbitmq.py @@ -0,0 +1,69 @@ +import json +import asyncio +from contextlib import suppress + +from .base import ModuleTestBase + + +class TestRabbitMQ(ModuleTestBase): + config_overrides = { + "modules": { + "rabbitmq": { + "url": "amqp://guest:guest@localhost/", + "queue": "bbot_events", + } + } + } + skip_distro_tests = True + + async def setup_before_prep(self, module_test): + import aio_pika + + # Start RabbitMQ + await asyncio.create_subprocess_exec( + "docker", "run", "-d", "--rm", "--name", "bbot-test-rabbitmq", "-p", "5672:5672", "rabbitmq:3-management" + ) + + # Wait for RabbitMQ to be ready + while True: + try: + # Attempt to connect to RabbitMQ with a timeout + connection = await aio_pika.connect_robust("amqp://guest:guest@localhost/") + break # Exit the loop if the connection is successful + except Exception as e: + with suppress(Exception): + await connection.close() + self.log.verbose(f"Waiting for RabbitMQ to be ready: {e}") + await asyncio.sleep(0.5) # Wait a bit before retrying + + self.connection = connection + self.channel = await self.connection.channel() + self.queue = await self.channel.declare_queue("bbot_events", durable=True) + + async def check(self, module_test, events): + try: + events_json = [e.json() for e in events] + events_json.sort(key=lambda x: x["timestamp"]) + + # Collect events from RabbitMQ + rabbitmq_events = [] + async with self.queue.iterator() as queue_iter: + async for message in queue_iter: + async with message.process(): + event_data = json.loads(message.body.decode("utf-8")) + rabbitmq_events.append(event_data) + if len(rabbitmq_events) >= len(events_json): + break + + rabbitmq_events.sort(key=lambda x: x["timestamp"]) + + # Verify the events match + assert events_json == rabbitmq_events, "Events do not match" + + finally: + # Clean up: Close the RabbitMQ connection + await self.connection.close() + # Stop RabbitMQ container + await asyncio.create_subprocess_exec( + "docker", "stop", "bbot-test-rabbitmq", stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE + ) From 01b2036f24690ccb64457fe7b1172028cfc8291a Mon Sep 17 00:00:00 2001 From: github-actions Date: Fri, 22 Nov 2024 19:57:19 -0500 Subject: [PATCH 18/26] fix tests, better error handling in module --- bbot/modules/output/rabbitmq.py | 14 ++++++++++---- bbot/test/test_step_1/test_python_api.py | 2 +- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/bbot/modules/output/rabbitmq.py b/bbot/modules/output/rabbitmq.py index 64c094dfbf..ba4205940d 100644 --- a/bbot/modules/output/rabbitmq.py +++ b/bbot/modules/output/rabbitmq.py @@ -39,10 +39,16 @@ async def handle_event(self, event): event_data = json.dumps(event_json).encode("utf-8") # Publish the message to the queue - await self.channel.default_exchange.publish( - aio_pika.Message(body=event_data), - routing_key=self.queue_name, - ) + while 1: + try: + await self.channel.default_exchange.publish( + aio_pika.Message(body=event_data), + routing_key=self.queue_name, + ) + break + except Exception as e: + self.error(f"Error publishing message to RabbitMQ: {e}, rerying...") + await self.helpers.sleep(1) async def cleanup(self): # Close the connection diff --git a/bbot/test/test_step_1/test_python_api.py b/bbot/test/test_step_1/test_python_api.py index eaa9636b1c..d67cb45999 100644 --- a/bbot/test/test_step_1/test_python_api.py +++ b/bbot/test/test_step_1/test_python_api.py @@ -119,7 +119,7 @@ def test_python_api_validation(): # normal module as output module with pytest.raises(ValidationError) as error: Scanner(output_modules=["robots"]) - assert str(error.value) == 'Could not find output module "robots". Did you mean "web_report"?' + assert str(error.value) == 'Could not find output module "robots". Did you mean "rabbitmq"?' # invalid preset type with pytest.raises(ValidationError) as error: Scanner(preset="asdf") From 7a1fd930ccb0298367f3f8af014853a90f2fbdbf Mon Sep 17 00:00:00 2001 From: github-actions Date: Mon, 25 Nov 2024 10:37:42 -0500 Subject: [PATCH 19/26] fixed conflict --- bbot/models/pydantic.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/bbot/models/pydantic.py b/bbot/models/pydantic.py index 356ab2e44c..07534937a2 100644 --- a/bbot/models/pydantic.py +++ b/bbot/models/pydantic.py @@ -93,7 +93,6 @@ def get_data(self): ### SCAN ### - class Scan(BBOTBaseModel): id: Annotated[str, "indexed", "unique"] name: str @@ -117,7 +116,6 @@ def from_scan(cls, scan): ### TARGET ### - class Target(BBOTBaseModel): name: str = "Default Target" strict_scope: bool = False From 6629e787325daf34da2b99c7d7ae028d8d521678 Mon Sep 17 00:00:00 2001 From: github-actions Date: Mon, 25 Nov 2024 10:41:51 -0500 Subject: [PATCH 20/26] fixed conflict --- bbot/models/pydantic.py | 1 - bbot/models/sql.py | 3 --- 2 files changed, 4 deletions(-) diff --git a/bbot/models/pydantic.py b/bbot/models/pydantic.py index 07534937a2..b7c5baae9b 100644 --- a/bbot/models/pydantic.py +++ b/bbot/models/pydantic.py @@ -50,7 +50,6 @@ def _indexed_fields(cls): ### EVENT ### - class Event(BBOTBaseModel): uuid: Annotated[str, "indexed", "unique"] id: Annotated[str, "indexed"] diff --git a/bbot/models/sql.py b/bbot/models/sql.py index 82ccdb1f6f..78465511f6 100644 --- a/bbot/models/sql.py +++ b/bbot/models/sql.py @@ -61,7 +61,6 @@ def __eq__(self, other): ### EVENT ### - class Event(BBOTBaseModel, table=True): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) @@ -106,7 +105,6 @@ def get_data(self): ### SCAN ### - class Scan(BBOTBaseModel, table=True): id: str = Field(primary_key=True) name: str @@ -121,7 +119,6 @@ class Scan(BBOTBaseModel, table=True): ### TARGET ### - class Target(BBOTBaseModel, table=True): name: str = "Default Target" strict_scope: bool = False From c653845895fc28325ac2170b4cdbc35bce95bd9d Mon Sep 17 00:00:00 2001 From: github-actions Date: Fri, 22 Nov 2024 18:30:05 -0500 Subject: [PATCH 21/26] new module: kafka --- bbot/core/event/base.py | 2 +- bbot/modules/output/elastic.py | 14 ++- bbot/modules/output/kafka.py | 42 +++++++ bbot/scanner/scanner.py | 6 +- .../module_tests/test_module_elastic.py | 9 +- .../module_tests/test_module_kafka.py | 108 ++++++++++++++++++ 6 files changed, 167 insertions(+), 14 deletions(-) create mode 100644 bbot/modules/output/kafka.py create mode 100644 bbot/test/test_step_2/module_tests/test_module_kafka.py diff --git a/bbot/core/event/base.py b/bbot/core/event/base.py index 76802dac81..05f1a91271 100644 --- a/bbot/core/event/base.py +++ b/bbot/core/event/base.py @@ -814,7 +814,7 @@ def json(self, mode="json"): if parent_uuid: j["parent_uuid"] = parent_uuid # tags - j.update({"tags": list(self.tags)}) + j.update({"tags": sorted(self.tags)}) # parent module if self.module: j.update({"module": str(self.module)}) diff --git a/bbot/modules/output/elastic.py b/bbot/modules/output/elastic.py index 15bc023df8..42c331c516 100644 --- a/bbot/modules/output/elastic.py +++ b/bbot/modules/output/elastic.py @@ -2,6 +2,10 @@ class Elastic(HTTP): + """ + docker run -d -p 9200:9200 --name=bbot-elastic --v "$(pwd)/elastic_data:/usr/share/elasticsearch/data" -e ELASTIC_PASSWORD=bbotislife -m 1GB docker.elastic.co/elasticsearch/elasticsearch:8.16.0 + """ + watched_events = ["*"] metadata = { "description": "Send scan results to Elasticsearch", @@ -9,9 +13,9 @@ class Elastic(HTTP): "author": "@TheTechromancer", } options = { - "url": "", + "url": "https://localhost:9200/bbot_events/_doc", "username": "elastic", - "password": "changeme", + "password": "bbotislife", "timeout": 10, } options_desc = { @@ -20,3 +24,9 @@ class Elastic(HTTP): "password": "Elastic password", "timeout": "HTTP timeout", } + + async def cleanup(self): + # refresh the index + doc_regex = self.helpers.re.compile(r"/[^/]+$") + refresh_url = doc_regex.sub("/_refresh", self.url) + await self.helpers.request(refresh_url, auth=self.auth) diff --git a/bbot/modules/output/kafka.py b/bbot/modules/output/kafka.py new file mode 100644 index 0000000000..5b2db13d60 --- /dev/null +++ b/bbot/modules/output/kafka.py @@ -0,0 +1,42 @@ +import json +from aiokafka import AIOKafkaProducer + +from bbot.modules.output.base import BaseOutputModule + + +class Kafka(BaseOutputModule): + watched_events = ["*"] + meta = { + "description": "Output scan data to a Kafka topic", + "created_date": "2024-11-17", + "author": "@TheTechromancer", + } + options = { + "bootstrap_servers": "localhost:9092", + "topic": "bbot_events", + } + options_desc = { + "bootstrap_servers": "A comma-separated list of Kafka server addresses", + "topic": "The Kafka topic to publish events to", + } + deps_pip = ["aiokafka~=0.12.0"] + + async def setup(self): + self.bootstrap_servers = self.config.get("bootstrap_servers", "localhost:9092") + self.topic = self.config.get("topic", "bbot_events") + self.producer = AIOKafkaProducer(bootstrap_servers=self.bootstrap_servers) + + # Start the producer + await self.producer.start() + self.verbose("Kafka producer started successfully") + return True + + async def handle_event(self, event): + event_json = event.json() + event_data = json.dumps(event_json).encode("utf-8") + await self.producer.send_and_wait(self.topic, event_data) + + async def cleanup(self): + # Stop the producer + await self.producer.stop() + self.verbose("Kafka producer stopped successfully") diff --git a/bbot/scanner/scanner.py b/bbot/scanner/scanner.py index 2602fa776c..8e99f104dd 100644 --- a/bbot/scanner/scanner.py +++ b/bbot/scanner/scanner.py @@ -865,15 +865,15 @@ async def _cleanup(self): if not self._cleanedup: self._cleanedup = True self.status = "CLEANING_UP" + # clean up modules + for mod in self.modules.values(): + await mod._cleanup() # clean up dns engine if self.helpers._dns is not None: await self.helpers.dns.shutdown() # clean up web engine if self.helpers._web is not None: await self.helpers.web.shutdown() - # clean up modules - for mod in self.modules.values(): - await mod._cleanup() with contextlib.suppress(Exception): self.home.rmdir() self.helpers.clean_old_scans() diff --git a/bbot/test/test_step_2/module_tests/test_module_elastic.py b/bbot/test/test_step_2/module_tests/test_module_elastic.py index 710c22e0f0..2f8891a640 100644 --- a/bbot/test/test_step_2/module_tests/test_module_elastic.py +++ b/bbot/test/test_step_2/module_tests/test_module_elastic.py @@ -48,12 +48,11 @@ async def setup_before_prep(self, module_test): response.raise_for_status() break except Exception as e: - print(f"Connection failed: {e}. Retrying...", flush=True) + self.log.verbose(f"Connection failed: {e}. Retrying...", flush=True) time.sleep(0.5) # Ensure the index is empty await client.delete(f"https://localhost:9200/bbot_test_events", auth=("elastic", "bbotislife")) - print("Elasticsearch index cleaned up", flush=True) async def check(self, module_test, events): try: @@ -65,17 +64,11 @@ async def check(self, module_test, events): # Connect to Elasticsearch async with httpx.AsyncClient(verify=False) as client: - # refresh the index - await client.post(f"https://localhost:9200/bbot_test_events/_refresh", auth=("elastic", "bbotislife")) - # Fetch all events from the index response = await client.get( f"https://localhost:9200/bbot_test_events/_search?size=100", auth=("elastic", "bbotislife") ) response_json = response.json() - import json - - print(f"response: {json.dumps(response_json, indent=2)}") db_events = [hit["_source"] for hit in response_json["hits"]["hits"]] # make sure we have the same number of events diff --git a/bbot/test/test_step_2/module_tests/test_module_kafka.py b/bbot/test/test_step_2/module_tests/test_module_kafka.py new file mode 100644 index 0000000000..6a81173561 --- /dev/null +++ b/bbot/test/test_step_2/module_tests/test_module_kafka.py @@ -0,0 +1,108 @@ +import json +import asyncio +from contextlib import suppress + +from .base import ModuleTestBase + + +class TestKafka(ModuleTestBase): + config_overrides = { + "modules": { + "kafka": { + "bootstrap_servers": "localhost:9092", + "topic": "bbot_events", + } + } + } + skip_distro_tests = True + + async def setup_before_prep(self, module_test): + # Start Zookeeper + await asyncio.create_subprocess_exec( + "docker", "run", "-d", "--rm", "--name", "bbot-test-zookeeper", "-p", "2181:2181", "zookeeper:3.9" + ) + + # Wait for Zookeeper to be ready + while True: + try: + # Attempt to connect to Zookeeper with a timeout + reader, writer = await asyncio.wait_for(asyncio.open_connection("localhost", 2181), timeout=0.5) + break # Exit the loop if the connection is successful + except Exception as e: + self.log.verbose(f"Waiting for Zookeeper to be ready: {e}") + await asyncio.sleep(0.5) # Wait a bit before retrying + finally: + with suppress(Exception): + writer.close() + await writer.wait_closed() + + # Start Kafka using wurstmeister/kafka + await asyncio.create_subprocess_exec( + "docker", + "run", + "-d", + "--rm", + "--name", + "bbot-test-kafka", + "--link", + "bbot-test-zookeeper:zookeeper", + "-e", + "KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181", + "-e", + "KAFKA_LISTENERS=PLAINTEXT://0.0.0.0:9092", + "-e", + "KAFKA_ADVERTISED_LISTENERS=PLAINTEXT://localhost:9092", + "-e", + "KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR=1", + "-p", + "9092:9092", + "wurstmeister/kafka", + ) + + from aiokafka import AIOKafkaConsumer + + # Wait for Kafka to be ready + while True: + try: + self.consumer = AIOKafkaConsumer( + "bbot_events", + bootstrap_servers="localhost:9092", + group_id="test_group", + ) + await self.consumer.start() + break # Exit the loop if the consumer starts successfully + except Exception as e: + self.log.verbose(f"Waiting for Kafka to be ready: {e}") + if hasattr(self, "consumer") and not self.consumer._closed: + await self.consumer.stop() + await asyncio.sleep(0.5) # Wait a bit before retrying + + async def check(self, module_test, events): + try: + events_json = [e.json() for e in events] + events_json.sort(key=lambda x: x["timestamp"]) + + # Collect events from Kafka + kafka_events = [] + async for msg in self.consumer: + event_data = json.loads(msg.value.decode("utf-8")) + kafka_events.append(event_data) + if len(kafka_events) >= len(events_json): + break + + kafka_events.sort(key=lambda x: x["timestamp"]) + + # Verify the events match + assert events_json == kafka_events, "Events do not match" + + finally: + # Clean up: Stop the Kafka consumer + if hasattr(self, "consumer") and not self.consumer._closed: + await self.consumer.stop() + # Stop Kafka and Zookeeper containers + await asyncio.create_subprocess_exec( + "docker", "stop", "bbot-test-kafka", stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE + ) + await asyncio.create_subprocess_exec( + "docker", "stop", "bbot-test-zookeeper", stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE + ) From 665e8f3f35f8f92e694becbd9eba88c31a9803d2 Mon Sep 17 00:00:00 2001 From: github-actions Date: Fri, 22 Nov 2024 18:56:45 -0500 Subject: [PATCH 22/26] fix elastic tests --- bbot/test/test_step_2/module_tests/test_module_elastic.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bbot/test/test_step_2/module_tests/test_module_elastic.py b/bbot/test/test_step_2/module_tests/test_module_elastic.py index 2f8891a640..db9f2359f7 100644 --- a/bbot/test/test_step_2/module_tests/test_module_elastic.py +++ b/bbot/test/test_step_2/module_tests/test_module_elastic.py @@ -48,7 +48,7 @@ async def setup_before_prep(self, module_test): response.raise_for_status() break except Exception as e: - self.log.verbose(f"Connection failed: {e}. Retrying...", flush=True) + self.log.verbose(f"Connection failed: {e}. Retrying...") time.sleep(0.5) # Ensure the index is empty @@ -117,7 +117,7 @@ async def check(self, module_test, events): auth=("elastic", "bbotislife"), params={"ignore": "400,404"}, ) - print(f"Deleted documents from index", flush=True) + self.log.verbose(f"Deleted documents from index") await asyncio.create_subprocess_exec( "docker", "stop", "bbot-test-elastic", stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE ) From 3a8c19444e50a8ae0c97e94940733a754ff7a20d Mon Sep 17 00:00:00 2001 From: github-actions Date: Fri, 22 Nov 2024 19:54:29 -0500 Subject: [PATCH 23/26] better error handling in module --- bbot/modules/output/kafka.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/bbot/modules/output/kafka.py b/bbot/modules/output/kafka.py index 5b2db13d60..0c28075450 100644 --- a/bbot/modules/output/kafka.py +++ b/bbot/modules/output/kafka.py @@ -34,7 +34,12 @@ async def setup(self): async def handle_event(self, event): event_json = event.json() event_data = json.dumps(event_json).encode("utf-8") - await self.producer.send_and_wait(self.topic, event_data) + while 1: + try: + await self.producer.send_and_wait(self.topic, event_data) + except Exception as e: + self.warning(f"Error sending event to Kafka: {e}, retrying...") + await self.helpers.sleep(1) async def cleanup(self): # Stop the producer From eb133cef4bcffa41ec9ed33b4d89d5bc5c3d8d96 Mon Sep 17 00:00:00 2001 From: github-actions Date: Fri, 22 Nov 2024 19:54:39 -0500 Subject: [PATCH 24/26] better error handling in module --- bbot/modules/output/kafka.py | 1 + 1 file changed, 1 insertion(+) diff --git a/bbot/modules/output/kafka.py b/bbot/modules/output/kafka.py index 0c28075450..0a31e0be12 100644 --- a/bbot/modules/output/kafka.py +++ b/bbot/modules/output/kafka.py @@ -37,6 +37,7 @@ async def handle_event(self, event): while 1: try: await self.producer.send_and_wait(self.topic, event_data) + break except Exception as e: self.warning(f"Error sending event to Kafka: {e}, retrying...") await self.helpers.sleep(1) From 9d399e7283df674c330782b06413023673ed17f9 Mon Sep 17 00:00:00 2001 From: github-actions Date: Fri, 22 Nov 2024 19:58:06 -0500 Subject: [PATCH 25/26] better mongo error handling --- bbot/modules/output/mongo.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/bbot/modules/output/mongo.py b/bbot/modules/output/mongo.py index 6ad16620f6..118ca82378 100644 --- a/bbot/modules/output/mongo.py +++ b/bbot/modules/output/mongo.py @@ -59,7 +59,13 @@ async def setup(self): async def handle_event(self, event): event_json = event.json() event_pydantic = Event(**event_json) - await self.events_collection.insert_one(event_pydantic.model_dump()) + while 1: + try: + await self.events_collection.insert_one(event_pydantic.model_dump()) + break + except Exception as e: + self.warning(f"Error inserting event into MongoDB: {e}, retrying...") + await self.helpers.sleep(1) if event.type == "SCAN": scan_json = Scan(**event.data_json).model_dump() From a1f367a9975529ab3f4cc83462ef97784d8e36cb Mon Sep 17 00:00:00 2001 From: Christian Clauss Date: Sat, 23 Nov 2024 15:58:06 +0100 Subject: [PATCH 26/26] git add .pre-commit-config.yaml --- .gitattributes | 2 +- .gitmodules | 2 +- .pre-commit-config.yaml | 48 +++++++++++ bbot/defaults.yml | 2 +- bbot/modules/internal/cloudcheck.py | 4 +- bbot/modules/internal/dnsresolve.py | 4 +- bbot/modules/report/asn.py | 9 +- bbot/presets/kitchen-sink.yml | 2 - bbot/presets/web/dotnet-audit.yml | 1 - bbot/scanner/preset/args.py | 4 +- bbot/scanner/preset/preset.py | 2 +- bbot/test/test_step_1/test__module__tests.py | 1 - bbot/test/test_step_1/test_bbot_fastapi.py | 3 - bbot/test/test_step_1/test_bloom_filter.py | 1 - bbot/test/test_step_1/test_dns.py | 3 - bbot/test/test_step_1/test_engine.py | 2 - bbot/test/test_step_1/test_events.py | 2 - bbot/test/test_step_1/test_helpers.py | 1 - bbot/test/test_step_1/test_presets.py | 7 +- bbot/test/test_step_1/test_target.py | 1 - bbot/test/test_step_1/test_web.py | 2 - .../module_tests/test_module_baddns_direct.py | 6 +- .../module_tests/test_module_excavate.py | 22 ++--- .../module_tests/test_module_gowitness.py | 4 +- .../module_tests/test_module_newsletters.py | 20 ++--- .../module_tests/test_module_ntlm.py | 3 +- .../module_tests/test_module_pgp.py | 4 +- .../module_tests/test_module_smuggler.py | 26 +++--- .../module_tests/test_module_speculate.py | 4 +- .../module_tests/test_module_viewdns.py | 2 +- bbot/wordlists/devops_mutations.txt | 2 +- bbot/wordlists/ffuf_shortname_candidates.txt | 2 +- bbot/wordlists/nameservers.txt | 2 +- bbot/wordlists/paramminer_headers.txt | 2 +- bbot/wordlists/paramminer_parameters.txt | 2 +- ...aft-small-extensions-lowercase_CLEANED.txt | 2 +- bbot/wordlists/valid_url_schemes.txt | 2 +- docs/data/chord_graph/entities.json | 2 +- docs/data/chord_graph/rels.json | 2 +- docs/dev/helpers/index.md | 2 +- docs/javascripts/tablesort.min.js | 2 +- docs/modules/custom_yara_rules.md | 6 +- docs/modules/internal_modules.md | 6 +- docs/modules/nuclei.md | 10 +-- docs/release_history.md | 2 +- docs/scanning/configuration.md | 2 +- docs/scanning/index.md | 2 +- docs/scanning/output.md | 8 +- docs/scanning/presets.md | 2 +- docs/scanning/presets_list.md | 86 +++++++++---------- mkdocs.yml | 4 +- 51 files changed, 188 insertions(+), 156 deletions(-) create mode 100644 .pre-commit-config.yaml diff --git a/.gitattributes b/.gitattributes index 49edcb7119..00bf2637dc 100644 --- a/.gitattributes +++ b/.gitattributes @@ -5,4 +5,4 @@ *.txt text eol=lf *.json text eol=lf *.md text eol=lf -*.sh text eol=lf \ No newline at end of file +*.sh text eol=lf diff --git a/.gitmodules b/.gitmodules index 0033a29676..c85f090f5f 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,4 +1,4 @@ [submodule "bbot/modules/playground"] path = bbot/modules/playground url = https://github.com/blacklanternsecurity/bbot-module-playground - branch = main \ No newline at end of file + branch = main diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000000..d6643f2ad3 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,48 @@ +# Learn more about this config here: https://pre-commit.com/ + +# To enable these pre-commit hooks run: +# `pipx install pre-commit` or `brew install pre-commit` +# Then in the project root directory run `pre-commit install` + +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v5.0.0 + hooks: + - id: check-added-large-files + - id: check-ast + - id: check-builtin-literals + - id: check-byte-order-marker + - id: check-case-conflict + # - id: check-docstring-first + # - id: check-executables-have-shebangs + - id: check-json + - id: check-merge-conflict + # - id: check-shebang-scripts-are-executable + - id: check-symlinks + - id: check-toml + - id: check-vcs-permalinks + - id: check-xml + # - id: check-yaml + - id: debug-statements + - id: destroyed-symlinks + # - id: detect-private-key + - id: end-of-file-fixer + - id: file-contents-sorter + - id: fix-byte-order-marker + - id: forbid-new-submodules + - id: forbid-submodules + - id: mixed-line-ending + - id: requirements-txt-fixer + - id: sort-simple-yaml + - id: trailing-whitespace + + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.8.0 + hooks: + - id: ruff + - id: ruff-format + + - repo: https://github.com/abravalheri/validate-pyproject + rev: v0.23 + hooks: + - id: validate-pyproject diff --git a/bbot/defaults.yml b/bbot/defaults.yml index 63f5f7e68b..61638595a0 100644 --- a/bbot/defaults.yml +++ b/bbot/defaults.yml @@ -74,7 +74,7 @@ dns: web: # HTTP proxy - http_proxy: + http_proxy: # Web user-agent user_agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36 Edg/119.0.2151.97 # Set the maximum number of HTTP links that can be followed in a row (0 == no spidering allowed) diff --git a/bbot/modules/internal/cloudcheck.py b/bbot/modules/internal/cloudcheck.py index 685d67f9d7..86b6130d71 100644 --- a/bbot/modules/internal/cloudcheck.py +++ b/bbot/modules/internal/cloudcheck.py @@ -57,7 +57,9 @@ async def handle_event(self, event, **kwargs): for provider in self.helpers.cloud.providers.values(): provider_name = provider.name.lower() base_kwargs = { - "parent": event, "tags": [f"{provider.provider_type}-{provider_name}"], "_provider": provider_name + "parent": event, + "tags": [f"{provider.provider_type}-{provider_name}"], + "_provider": provider_name, } # loop through the provider's regex signatures, if any for event_type, sigs in provider.signatures.items(): diff --git a/bbot/modules/internal/dnsresolve.py b/bbot/modules/internal/dnsresolve.py index 15facec564..bdca0ea5c3 100644 --- a/bbot/modules/internal/dnsresolve.py +++ b/bbot/modules/internal/dnsresolve.py @@ -306,9 +306,7 @@ def get_dns_parent(self, event): @property def emit_raw_records(self): if self._emit_raw_records is None: - watching_raw_records = any( - "RAW_DNS_RECORD" in m.get_watched_events() for m in self.scan.modules.values() - ) + watching_raw_records = any("RAW_DNS_RECORD" in m.get_watched_events() for m in self.scan.modules.values()) omitted_event_types = self.scan.config.get("omit_event_types", []) omit_raw_records = "RAW_DNS_RECORD" in omitted_event_types self._emit_raw_records = watching_raw_records or not omit_raw_records diff --git a/bbot/modules/report/asn.py b/bbot/modules/report/asn.py index 771e4b4f7f..3b3c488d15 100644 --- a/bbot/modules/report/asn.py +++ b/bbot/modules/report/asn.py @@ -207,7 +207,14 @@ async def get_asn_bgpview(self, ip): return False asns_tried.add(asn) asns.append( - {"asn": asn, "subnet": subnet, "name": name, "description": description, "country": country, "emails": emails} + { + "asn": asn, + "subnet": subnet, + "name": name, + "description": description, + "country": country, + "emails": emails, + } ) if not asns: self.debug(f'No results for "{ip}"') diff --git a/bbot/presets/kitchen-sink.yml b/bbot/presets/kitchen-sink.yml index 43057bf44a..073f480bb2 100644 --- a/bbot/presets/kitchen-sink.yml +++ b/bbot/presets/kitchen-sink.yml @@ -16,5 +16,3 @@ config: modules: baddns: enable_references: True - - diff --git a/bbot/presets/web/dotnet-audit.yml b/bbot/presets/web/dotnet-audit.yml index bbc5e201e0..b1cd8e9cac 100644 --- a/bbot/presets/web/dotnet-audit.yml +++ b/bbot/presets/web/dotnet-audit.yml @@ -19,4 +19,3 @@ config: extensions: asp,aspx,ashx,asmx,ascx telerik: exploit_RAU_crypto: True - diff --git a/bbot/scanner/preset/args.py b/bbot/scanner/preset/args.py index b4294710fa..d7b55d50c9 100644 --- a/bbot/scanner/preset/args.py +++ b/bbot/scanner/preset/args.py @@ -175,7 +175,9 @@ def preset_from_args(self): def create_parser(self, *args, **kwargs): kwargs.update( { - "description": "Bighuge BLS OSINT Tool", "formatter_class": argparse.RawTextHelpFormatter, "epilog": self.epilog + "description": "Bighuge BLS OSINT Tool", + "formatter_class": argparse.RawTextHelpFormatter, + "epilog": self.epilog, } ) p = argparse.ArgumentParser(*args, **kwargs) diff --git a/bbot/scanner/preset/preset.py b/bbot/scanner/preset/preset.py index 9e67f2c803..b275cc1f72 100644 --- a/bbot/scanner/preset/preset.py +++ b/bbot/scanner/preset/preset.py @@ -967,7 +967,7 @@ def presets_table(self, include_modules=True): header = ["Preset", "Category", "Description", "# Modules"] if include_modules: header.append("Modules") - for (loaded_preset, category, preset_path, original_file) in self.all_presets.values(): + for loaded_preset, category, preset_path, original_file in self.all_presets.values(): loaded_preset = loaded_preset.bake() num_modules = f"{len(loaded_preset.scan_modules):,}" row = [loaded_preset.name, category, loaded_preset.description, num_modules] diff --git a/bbot/test/test_step_1/test__module__tests.py b/bbot/test/test_step_1/test__module__tests.py index 791e58f58a..e50f67a910 100644 --- a/bbot/test/test_step_1/test__module__tests.py +++ b/bbot/test/test_step_1/test__module__tests.py @@ -15,7 +15,6 @@ def test__module__tests(): - preset = Preset() # make sure each module has a .py file diff --git a/bbot/test/test_step_1/test_bbot_fastapi.py b/bbot/test/test_step_1/test_bbot_fastapi.py index add7ad099a..1136963a3d 100644 --- a/bbot/test/test_step_1/test_bbot_fastapi.py +++ b/bbot/test/test_step_1/test_bbot_fastapi.py @@ -17,7 +17,6 @@ def run_bbot_multiprocess(queue): def test_bbot_multiprocess(bbot_httpserver): - bbot_httpserver.expect_request("/").respond_with_data("test@blacklanternsecurity.com") queue = multiprocessing.Queue() @@ -32,12 +31,10 @@ def test_bbot_multiprocess(bbot_httpserver): def test_bbot_fastapi(bbot_httpserver): - bbot_httpserver.expect_request("/").respond_with_data("test@blacklanternsecurity.com") fastapi_process = start_fastapi_server() try: - # wait for the server to start with a timeout of 60 seconds start_time = time.time() while True: diff --git a/bbot/test/test_step_1/test_bloom_filter.py b/bbot/test/test_step_1/test_bloom_filter.py index 22ec4db323..f954bfbc6e 100644 --- a/bbot/test/test_step_1/test_bloom_filter.py +++ b/bbot/test/test_step_1/test_bloom_filter.py @@ -6,7 +6,6 @@ @pytest.mark.asyncio async def test_bloom_filter(): - def generate_random_strings(n, length=10): """Generate a list of n random strings.""" return ["".join(random.choices(string.ascii_letters + string.digits, k=length)) for _ in range(n)] diff --git a/bbot/test/test_step_1/test_dns.py b/bbot/test/test_step_1/test_dns.py index dbbfe68d65..c032b44e48 100644 --- a/bbot/test/test_step_1/test_dns.py +++ b/bbot/test/test_step_1/test_dns.py @@ -185,7 +185,6 @@ async def test_dns_resolution(bbot_scanner): @pytest.mark.asyncio async def test_wildcards(bbot_scanner): - scan = bbot_scanner("1.1.1.1") helpers = scan.helpers @@ -634,7 +633,6 @@ def custom_lookup(query, rdtype): @pytest.mark.asyncio async def test_wildcard_deduplication(bbot_scanner): - custom_lookup = """ def custom_lookup(query, rdtype): if rdtype == "TXT" and query.strip(".").endswith("evilcorp.com"): @@ -670,7 +668,6 @@ async def handle_event(self, event): @pytest.mark.asyncio async def test_dns_raw_records(bbot_scanner): - from bbot.modules.base import BaseModule class DummyModule(BaseModule): diff --git a/bbot/test/test_step_1/test_engine.py b/bbot/test/test_step_1/test_engine.py index dbb21246f2..653c3dcd6c 100644 --- a/bbot/test/test_step_1/test_engine.py +++ b/bbot/test/test_step_1/test_engine.py @@ -14,7 +14,6 @@ async def test_engine(): return_errored = False class TestEngineServer(EngineServer): - CMDS = { 0: "return_thing", 1: "yield_stuff", @@ -54,7 +53,6 @@ async def yield_stuff(self, n): raise class TestEngineClient(EngineClient): - SERVER_CLASS = TestEngineServer async def return_thing(self, n): diff --git a/bbot/test/test_step_1/test_events.py b/bbot/test/test_step_1/test_events.py index 39be4d704b..195f08ea89 100644 --- a/bbot/test/test_step_1/test_events.py +++ b/bbot/test/test_step_1/test_events.py @@ -9,7 +9,6 @@ @pytest.mark.asyncio async def test_events(events, helpers): - scan = Scanner() await scan._prep() @@ -617,7 +616,6 @@ async def test_events(events, helpers): @pytest.mark.asyncio async def test_event_discovery_context(): - from bbot.modules.base import BaseModule scan = Scanner("evilcorp.com") diff --git a/bbot/test/test_step_1/test_helpers.py b/bbot/test/test_step_1/test_helpers.py index 16b0dc9ec5..2eb67cd13d 100644 --- a/bbot/test/test_step_1/test_helpers.py +++ b/bbot/test/test_step_1/test_helpers.py @@ -857,7 +857,6 @@ def test_liststring_invalidfnchars(helpers): # test parameter validation @pytest.mark.asyncio async def test_parameter_validation(helpers): - getparam_valid_params = { "name", "age", diff --git a/bbot/test/test_step_1/test_presets.py b/bbot/test/test_step_1/test_presets.py index 73fdcf23a5..5b1564f12c 100644 --- a/bbot/test/test_step_1/test_presets.py +++ b/bbot/test/test_step_1/test_presets.py @@ -16,7 +16,7 @@ def test_preset_descriptions(): # ensure very preset has a description preset = Preset() - for (loaded_preset, category, preset_path, original_filename) in preset.all_presets.values(): + for loaded_preset, category, preset_path, original_filename in preset.all_presets.values(): assert ( loaded_preset.description ), f'Preset "{loaded_preset.name}" at {original_filename} does not have a description.' @@ -68,7 +68,6 @@ def test_core(): def test_preset_yaml(clean_default_config): - import yaml preset1 = Preset( @@ -171,7 +170,6 @@ def test_preset_cache(): def test_preset_scope(): - # test target merging scan = Scanner("1.2.3.4", preset=Preset.from_dict({"target": ["evilcorp.com"]})) assert {str(h) for h in scan.preset.target.seeds.hosts} == {"1.2.3.4/32", "evilcorp.com"} @@ -378,7 +376,6 @@ def test_preset_scope(): @pytest.mark.asyncio async def test_preset_logging(): - scan = Scanner() # test individual verbosity levels @@ -711,7 +708,6 @@ class TestModule5(BaseModule): def test_preset_include(): - # test recursive preset inclusion custom_preset_dir_1 = bbot_test_dir / "custom_preset_dir" @@ -883,7 +879,6 @@ def test_preset_module_disablement(clean_default_config): def test_preset_require_exclude(): - def get_module_flags(p): for m in p.scan_modules: preloaded = p.preloaded_module(m) diff --git a/bbot/test/test_step_1/test_target.py b/bbot/test/test_step_1/test_target.py index 3c9a9832b5..8f2a6bf91f 100644 --- a/bbot/test/test_step_1/test_target.py +++ b/bbot/test/test_step_1/test_target.py @@ -337,7 +337,6 @@ async def test_target(bbot_scanner): @pytest.mark.asyncio async def test_blacklist_regex(bbot_scanner, bbot_httpserver): - from bbot.scanner.target import ScanBlacklist blacklist = ScanBlacklist("evilcorp.com") diff --git a/bbot/test/test_step_1/test_web.py b/bbot/test/test_step_1/test_web.py index dc1f50339e..e07ed3d7d4 100644 --- a/bbot/test/test_step_1/test_web.py +++ b/bbot/test/test_step_1/test_web.py @@ -6,7 +6,6 @@ @pytest.mark.asyncio async def test_web_engine(bbot_scanner, bbot_httpserver, httpx_mock): - from werkzeug.wrappers import Response def server_handler(request): @@ -134,7 +133,6 @@ def server_handler(request): @pytest.mark.asyncio async def test_web_helpers(bbot_scanner, bbot_httpserver, httpx_mock): - # json conversion scan = bbot_scanner("evilcorp.com") url = "http://www.evilcorp.com/json_test?a=b" diff --git a/bbot/test/test_step_2/module_tests/test_module_baddns_direct.py b/bbot/test/test_step_2/module_tests/test_module_baddns_direct.py index 77a86153c7..b2b49717c8 100644 --- a/bbot/test/test_step_2/module_tests/test_module_baddns_direct.py +++ b/bbot/test/test_step_2/module_tests/test_module_baddns_direct.py @@ -55,8 +55,8 @@ def set_target(self, target): def check(self, module_test, events): assert any( e.type == "FINDING" - and "Possible [AWS Bucket Takeover Detection] via direct BadDNS analysis. Indicator: [[Words: The specified bucket does not exist | Condition: and | Part: body] Matchers-Condition: and] Trigger: [self] baddns Module: [CNAME]" - in e.data["description"] - for e in events + and "Possible [AWS Bucket Takeover Detection] via direct BadDNS analysis. Indicator: [[Words: The specified bucket does not exist | Condition: and | Part: body] Matchers-Condition: and] Trigger: [self] baddns Module: [CNAME]" + in e.data["description"] + for e in events ), "Failed to emit FINDING" assert any("baddns-cname" in e.tags for e in events), "Failed to add baddns tag" diff --git a/bbot/test/test_step_2/module_tests/test_module_excavate.py b/bbot/test/test_step_2/module_tests/test_module_excavate.py index f5f774e380..a2ccf97613 100644 --- a/bbot/test/test_step_2/module_tests/test_module_excavate.py +++ b/bbot/test/test_step_2/module_tests/test_module_excavate.py @@ -895,7 +895,7 @@ class TestExcavateRAWTEXT(ModuleTestBase): /Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ] >> /Rotate 0 /Trans << ->> +>> /Type /Page >> endobj @@ -906,7 +906,7 @@ class TestExcavateRAWTEXT(ModuleTestBase): endobj 5 0 obj << -/Author (anonymous) /CreationDate (D:20240807182842+00'00') /Creator (ReportLab PDF Library - www.reportlab.com) /Keywords () /ModDate (D:20240807182842+00'00') /Producer (ReportLab PDF Library - www.reportlab.com) +/Author (anonymous) /CreationDate (D:20240807182842+00'00') /Creator (ReportLab PDF Library - www.reportlab.com) /Keywords () /ModDate (D:20240807182842+00'00') /Producer (ReportLab PDF Library - www.reportlab.com) /Subject (unspecified) /Title (untitled) /Trapped /False >> endobj @@ -924,17 +924,17 @@ class TestExcavateRAWTEXT(ModuleTestBase): endobj xref 0 8 -0000000000 65535 f -0000000073 00000 n -0000000104 00000 n -0000000211 00000 n -0000000414 00000 n -0000000482 00000 n -0000000778 00000 n -0000000837 00000 n +0000000000 65535 f +0000000073 00000 n +0000000104 00000 n +0000000211 00000 n +0000000414 00000 n +0000000482 00000 n +0000000778 00000 n +0000000837 00000 n trailer << -/ID +/ID [<3c7340500fa2fe72523c5e6f07511599><3c7340500fa2fe72523c5e6f07511599>] % ReportLab generated PDF document -- digest (http://www.reportlab.com) diff --git a/bbot/test/test_step_2/module_tests/test_module_gowitness.py b/bbot/test/test_step_2/module_tests/test_module_gowitness.py index 2d6dc2cd8f..6090fbb1d6 100644 --- a/bbot/test/test_step_2/module_tests/test_module_gowitness.py +++ b/bbot/test/test_step_2/module_tests/test_module_gowitness.py @@ -101,6 +101,4 @@ class TestGoWitnessWithBlob(TestGowitness): def check(self, module_test, events): webscreenshots = [e for e in events if e.type == "WEBSCREENSHOT"] assert webscreenshots, "failed to raise WEBSCREENSHOT events" - assert all( - "blob" in e.data and e.data["blob"] for e in webscreenshots - ), "blob not found in WEBSCREENSHOT data" + assert all("blob" in e.data and e.data["blob"] for e in webscreenshots), "blob not found in WEBSCREENSHOT data" diff --git a/bbot/test/test_step_2/module_tests/test_module_newsletters.py b/bbot/test/test_step_2/module_tests/test_module_newsletters.py index 98210f658e..c5edd25141 100644 --- a/bbot/test/test_step_2/module_tests/test_module_newsletters.py +++ b/bbot/test/test_step_2/module_tests/test_module_newsletters.py @@ -10,16 +10,16 @@ class TestNewsletters(ModuleTestBase): modules_overrides = ["speculate", "httpx", "newsletters"] html_with_newsletter = """ - """ diff --git a/bbot/test/test_step_2/module_tests/test_module_ntlm.py b/bbot/test/test_step_2/module_tests/test_module_ntlm.py index 1e79be7705..7b834ef2f9 100644 --- a/bbot/test/test_step_2/module_tests/test_module_ntlm.py +++ b/bbot/test/test_step_2/module_tests/test_module_ntlm.py @@ -10,7 +10,8 @@ async def setup_after_prep(self, module_test): request_args = {"uri": "/", "headers": {"test": "header"}} module_test.set_expect_requests(request_args, {}) request_args = { - "uri": "/oab/", "headers": {"Authorization": "NTLM TlRMTVNTUAABAAAAl4II4gAAAAAAAAAAAAAAAAAAAAAKAGFKAAAADw=="} + "uri": "/oab/", + "headers": {"Authorization": "NTLM TlRMTVNTUAABAAAAl4II4gAAAAAAAAAAAAAAAAAAAAAKAGFKAAAADw=="}, } respond_args = { "headers": { diff --git a/bbot/test/test_step_2/module_tests/test_module_pgp.py b/bbot/test/test_step_2/module_tests/test_module_pgp.py index e6f122dd93..dc493d7b52 100644 --- a/bbot/test/test_step_2/module_tests/test_module_pgp.py +++ b/bbot/test/test_step_2/module_tests/test_module_pgp.py @@ -9,10 +9,10 @@ class TestPGP(ModuleTestBase):

Search results for 'blacklanternsecurity.com'

Type bits/keyID            cr. time   exp time   key expir
 
diff --git a/bbot/test/test_step_2/module_tests/test_module_smuggler.py b/bbot/test/test_step_2/module_tests/test_module_smuggler.py index dcbb9fd3b5..fb86b9ae92 100644 --- a/bbot/test/test_step_2/module_tests/test_module_smuggler.py +++ b/bbot/test/test_step_2/module_tests/test_module_smuggler.py @@ -1,13 +1,13 @@ from .base import ModuleTestBase smuggler_text = r""" - ______ _ - / _____) | | - ( (____ ____ _ _ ____ ____| | _____ ____ + ______ _ + / _____) | | + ( (____ ____ _ _ ____ ____| | _____ ____ \____ \| \| | | |/ _ |/ _ | || ___ |/ ___) - _____) ) | | | |_| ( (_| ( (_| | || ____| | - (______/|_|_|_|____/ \___ |\___ |\_)_____)_| - (_____(_____| + _____) ) | | | |_| ( (_| ( (_| | || ____| | + (______/|_|_|_|____/ \___ |\___ |\_)_____)_| + (_____(_____| @defparam v1.1 @@ -16,13 +16,13 @@ [+] Endpoint : / [+] Configfile : default.py [+] Timeout : 5.0 seconds - [+] Cookies : 1 (Appending to the attack) - [nameprefix1] : Checking TECL... - [nameprefix1] : Checking CLTE... - [nameprefix1] : OK (TECL: 0.61 - 405) (CLTE: 0.62 - 405) - [tabprefix1] : Checking TECL...git - [tabprefix1] : Checking CLTE... - [tabprefix1] : Checking TECL... + [+] Cookies : 1 (Appending to the attack) + [nameprefix1] : Checking TECL... + [nameprefix1] : Checking CLTE... + [nameprefix1] : OK (TECL: 0.61 - 405) (CLTE: 0.62 - 405) + [tabprefix1] : Checking TECL...git + [tabprefix1] : Checking CLTE... + [tabprefix1] : Checking TECL... [tabprefix1] : Checking CLTE... [tabprefix1] : Checking TECL... [tabprefix1] : Checking CLTE... diff --git a/bbot/test/test_step_2/module_tests/test_module_speculate.py b/bbot/test/test_step_2/module_tests/test_module_speculate.py index e407470346..55db777e7b 100644 --- a/bbot/test/test_step_2/module_tests/test_module_speculate.py +++ b/bbot/test/test_step_2/module_tests/test_module_speculate.py @@ -63,7 +63,7 @@ def check(self, module_test, events): events_data.add(e.data) assert all( x in events_data - for x in ("evilcorp.com:80", "evilcorp.com:443", "asdf.evilcorp.com:80", "asdf.evilcorp.com:443") + for x in ("evilcorp.com:80", "evilcorp.com:443", "asdf.evilcorp.com:80", "asdf.evilcorp.com:443") ) @@ -78,5 +78,5 @@ def check(self, module_test, events): events_data.add(e.data) assert not any( x in events_data - for x in ("evilcorp.com:80", "evilcorp.com:443", "asdf.evilcorp.com:80", "asdf.evilcorp.com:443") + for x in ("evilcorp.com:80", "evilcorp.com:443", "asdf.evilcorp.com:80", "asdf.evilcorp.com:443") ) diff --git a/bbot/test/test_step_2/module_tests/test_module_viewdns.py b/bbot/test/test_step_2/module_tests/test_module_viewdns.py index d196981ba1..e8b2fe2339 100644 --- a/bbot/test/test_step_2/module_tests/test_module_viewdns.py +++ b/bbot/test/test_step_2/module_tests/test_module_viewdns.py @@ -66,7 +66,7 @@ def check(self, module_test, events): - ViewDNS.info > Tools > + ViewDNS.info > Tools >

Reverse Whois Lookup



This free tool will allow you to find domain names owned by an individual person or company. Simply enter the email address or name of the person or company to find other domains registered using those same details. FAQ.

diff --git a/bbot/wordlists/devops_mutations.txt b/bbot/wordlists/devops_mutations.txt index bfde86c591..b3fc8deda1 100644 --- a/bbot/wordlists/devops_mutations.txt +++ b/bbot/wordlists/devops_mutations.txt @@ -105,4 +105,4 @@ store home production auto -cn \ No newline at end of file +cn diff --git a/bbot/wordlists/ffuf_shortname_candidates.txt b/bbot/wordlists/ffuf_shortname_candidates.txt index 4439d6d744..2d57ee9463 100644 --- a/bbot/wordlists/ffuf_shortname_candidates.txt +++ b/bbot/wordlists/ffuf_shortname_candidates.txt @@ -107979,4 +107979,4 @@ zzz zzzindex zzztest zzzz -zzzzz \ No newline at end of file +zzzzz diff --git a/bbot/wordlists/nameservers.txt b/bbot/wordlists/nameservers.txt index d350e56f9c..9153631946 100644 --- a/bbot/wordlists/nameservers.txt +++ b/bbot/wordlists/nameservers.txt @@ -2370,4 +2370,4 @@ 8.25.185.131 203.39.3.133 118.69.187.252 -108.56.80.135 \ No newline at end of file +108.56.80.135 diff --git a/bbot/wordlists/paramminer_headers.txt b/bbot/wordlists/paramminer_headers.txt index 53ea11e8b4..3fe2366059 100644 --- a/bbot/wordlists/paramminer_headers.txt +++ b/bbot/wordlists/paramminer_headers.txt @@ -1147,4 +1147,4 @@ http_sm_userdn http_sm_usermsg x-remote-ip traceparent -tracestate \ No newline at end of file +tracestate diff --git a/bbot/wordlists/paramminer_parameters.txt b/bbot/wordlists/paramminer_parameters.txt index 2022323fb3..501878d987 100644 --- a/bbot/wordlists/paramminer_parameters.txt +++ b/bbot/wordlists/paramminer_parameters.txt @@ -6520,4 +6520,4 @@ shell_path user_token adminCookie fullapp -LandingUrl \ No newline at end of file +LandingUrl diff --git a/bbot/wordlists/raft-small-extensions-lowercase_CLEANED.txt b/bbot/wordlists/raft-small-extensions-lowercase_CLEANED.txt index 6e2aca6506..b5f461182f 100644 --- a/bbot/wordlists/raft-small-extensions-lowercase_CLEANED.txt +++ b/bbot/wordlists/raft-small-extensions-lowercase_CLEANED.txt @@ -830,4 +830,4 @@ .z .zdat .zif -.zip \ No newline at end of file +.zip diff --git a/bbot/wordlists/valid_url_schemes.txt b/bbot/wordlists/valid_url_schemes.txt index f0a440da9b..721a854aee 100644 --- a/bbot/wordlists/valid_url_schemes.txt +++ b/bbot/wordlists/valid_url_schemes.txt @@ -379,4 +379,4 @@ xri ymsgr z39.50 z39.50r -z39.50s \ No newline at end of file +z39.50s diff --git a/docs/data/chord_graph/entities.json b/docs/data/chord_graph/entities.json index 96d3875945..88242097ed 100644 --- a/docs/data/chord_graph/entities.json +++ b/docs/data/chord_graph/entities.json @@ -2020,4 +2020,4 @@ 7 ] } -] \ No newline at end of file +] diff --git a/docs/data/chord_graph/rels.json b/docs/data/chord_graph/rels.json index 7ebca1393a..43a646026a 100644 --- a/docs/data/chord_graph/rels.json +++ b/docs/data/chord_graph/rels.json @@ -1719,4 +1719,4 @@ "target": 148, "type": "produces" } -] \ No newline at end of file +] diff --git a/docs/dev/helpers/index.md b/docs/dev/helpers/index.md index 60d64f793d..cc27ed1f2b 100644 --- a/docs/dev/helpers/index.md +++ b/docs/dev/helpers/index.md @@ -6,7 +6,7 @@ The vast majority of these helpers can be accessed directly from the `.helpers` ```python class MyModule(BaseModule): - + ... async def handle_event(self, event): diff --git a/docs/javascripts/tablesort.min.js b/docs/javascripts/tablesort.min.js index 65a83b1138..fcd3b078ef 100644 --- a/docs/javascripts/tablesort.min.js +++ b/docs/javascripts/tablesort.min.js @@ -3,4 +3,4 @@ * http://tristen.ca/tablesort/demo/ * Copyright (c) 2021 ; Licensed MIT */ -!function(){function a(b,c){if(!(this instanceof a))return new a(b,c);if(!b||"TABLE"!==b.tagName)throw new Error("Element must be a table");this.init(b,c||{})}var b=[],c=function(a){var b;return window.CustomEvent&&"function"==typeof window.CustomEvent?b=new CustomEvent(a):(b=document.createEvent("CustomEvent"),b.initCustomEvent(a,!1,!1,void 0)),b},d=function(a,b){return a.getAttribute(b.sortAttribute||"data-sort")||a.textContent||a.innerText||""},e=function(a,b){return a=a.trim().toLowerCase(),b=b.trim().toLowerCase(),a===b?0:a0)if(a.tHead&&a.tHead.rows.length>0){for(e=0;e0&&n.push(m),o++;if(!n)return}for(o=0;o0)if(a.tHead&&a.tHead.rows.length>0){for(e=0;e0&&n.push(m),o++;if(!n)return}for(o=0;o + ## Dependencies diff --git a/docs/scanning/output.md b/docs/scanning/output.md index dd45a5c833..66d9b1c70c 100644 --- a/docs/scanning/output.md +++ b/docs/scanning/output.md @@ -291,7 +291,7 @@ bbot -f subdomain-enum -t evilcorp.com -om neo4j ### Cypher Queries and Tips -Neo4j uses the Cypher Query Language for its graph query language. Cypher uses common clauses to craft relational queries and present the desired data in multiple formats. +Neo4j uses the Cypher Query Language for its graph query language. Cypher uses common clauses to craft relational queries and present the desired data in multiple formats. Cypher queries can be broken down into three required pieces; selection, filter, and presentation. The selection piece identifies what data that will be searched against - 90% of the time the "MATCH" clause will be enough but there are means to read from csv or json data files. In all of these examples the "MATCH" clause will be used. The filter piece helps to focus in on the required data and used the "WHERE" clause to accomplish this effort (most basic operators can be used). Finally, the presentation section identifies how the data should be presented back to the querier. While neo4j is a graph database, it can be used in a traditional table view. @@ -300,7 +300,7 @@ A simple query to grab every URL event with ".com" in the BBOT data field would In this query the following can be identified: - Within the MATCH statement "u" is a variable and can be any value needed by the user while the "URL" label is a direct relationship to the BBOT event type. -- The WHERE statement allows the query to filter on any of the BBOT event properties like data, tag, or even the label itself. +- The WHERE statement allows the query to filter on any of the BBOT event properties like data, tag, or even the label itself. - The RETURN statement is a general presentation of the whole URL event but this can be narrowed down to present any of the specific properties of the BBOT event (`RETURN u.data, u.tags`). The following are a few recommended queries to get started with: @@ -337,6 +337,6 @@ RETURN n.data, collect(distinct port) MATCH (n) DETACH DELETE n ``` -This is not an exhaustive list of clauses, filters, or other means to use cypher and should be considered a starting point. To build more advanced queries consider reading Neo4j's Cypher [documentation](https://neo4j.com/docs/cypher-manual/current/introduction/). +This is not an exhaustive list of clauses, filters, or other means to use cypher and should be considered a starting point. To build more advanced queries consider reading Neo4j's Cypher [documentation](https://neo4j.com/docs/cypher-manual/current/introduction/). -Additional note: these sample queries are dependent on the existence of the data in the target neo4j database. +Additional note: these sample queries are dependent on the existence of the data in the target neo4j database. diff --git a/docs/scanning/presets.md b/docs/scanning/presets.md index f68a62dc33..7fa8f8c93b 100644 --- a/docs/scanning/presets.md +++ b/docs/scanning/presets.md @@ -37,7 +37,7 @@ bbot -lp Enable them with `-p`: ```bash -# do a subdomain enumeration +# do a subdomain enumeration bbot -t evilcorp.com -p subdomain-enum # multiple presets - subdomain enumeration + web spider diff --git a/docs/scanning/presets_list.md b/docs/scanning/presets_list.md index 93e1d3c8b3..416e163c52 100644 --- a/docs/scanning/presets_list.md +++ b/docs/scanning/presets_list.md @@ -8,13 +8,13 @@ Run all baddns modules and submodules. ??? note "`baddns-thorough.yml`" ```yaml title="~/.bbot/presets/baddns-thorough.yml" description: Run all baddns modules and submodules. - - + + modules: - baddns - baddns_zone - baddns_direct - + config: modules: baddns: @@ -32,10 +32,10 @@ Enumerate cloud resources such as storage buckets, etc. ??? note "`cloud-enum.yml`" ```yaml title="~/.bbot/presets/cloud-enum.yml" description: Enumerate cloud resources such as storage buckets, etc. - + include: - subdomain-enum - + flags: - cloud-enum ``` @@ -51,7 +51,7 @@ Enumerate Git repositories, Docker images, etc. ??? note "`code-enum.yml`" ```yaml title="~/.bbot/presets/code-enum.yml" description: Enumerate Git repositories, Docker images, etc. - + flags: - code-enum ``` @@ -67,17 +67,17 @@ Recursive web directory brute-force (aggressive) ??? note "`dirbust-heavy.yml`" ```yaml title="~/.bbot/presets/web/dirbust-heavy.yml" description: Recursive web directory brute-force (aggressive) - + include: - spider - + flags: - iis-shortnames - + modules: - ffuf - wayback - + config: modules: iis_shortnames: @@ -118,13 +118,13 @@ Basic web directory brute-force (surface-level directories only) ??? note "`dirbust-light.yml`" ```yaml title="~/.bbot/presets/web/dirbust-light.yml" description: Basic web directory brute-force (surface-level directories only) - + include: - iis-shortnames - + modules: - ffuf - + config: modules: ffuf: @@ -143,11 +143,11 @@ Comprehensive scan for all IIS/.NET specific modules and module settings ??? note "`dotnet-audit.yml`" ```yaml title="~/.bbot/presets/web/dotnet-audit.yml" description: Comprehensive scan for all IIS/.NET specific modules and module settings - - + + include: - iis-shortnames - + modules: - httpx - badsecrets @@ -156,14 +156,14 @@ Comprehensive scan for all IIS/.NET specific modules and module settings - telerik - ajaxpro - dotnetnuke - + config: modules: ffuf: extensions: asp,aspx,ashx,asmx,ascx telerik: exploit_RAU_crypto: True - + ``` Category: web @@ -177,10 +177,10 @@ Enumerate email addresses from APIs, web crawling, etc. ??? note "`email-enum.yml`" ```yaml title="~/.bbot/presets/email-enum.yml" description: Enumerate email addresses from APIs, web crawling, etc. - + flags: - email-enum - + output_modules: - emails ``` @@ -196,10 +196,10 @@ Scan only the provided targets as fast as possible - no extra discovery ??? note "`fast.yml`" ```yaml title="~/.bbot/presets/fast.yml" description: Scan only the provided targets as fast as possible - no extra discovery - + exclude_modules: - excavate - + config: # only scan the exact targets specified scope: @@ -224,10 +224,10 @@ Recursively enumerate IIS shortnames ??? note "`iis-shortnames.yml`" ```yaml title="~/.bbot/presets/web/iis-shortnames.yml" description: Recursively enumerate IIS shortnames - + flags: - iis-shortnames - + config: modules: iis_shortnames: @@ -246,7 +246,7 @@ Everything everywhere all at once ??? note "`kitchen-sink.yml`" ```yaml title="~/.bbot/presets/kitchen-sink.yml" description: Everything everywhere all at once - + include: - subdomain-enum - cloud-enum @@ -258,13 +258,13 @@ Everything everywhere all at once - dirbust-light - web-screenshots - baddns-thorough - + config: modules: baddns: enable_references: True - - + + ``` @@ -278,13 +278,13 @@ Discover new web parameters via brute-force ??? note "`paramminer.yml`" ```yaml title="~/.bbot/presets/web/paramminer.yml" description: Discover new web parameters via brute-force - + flags: - web-paramminer - + modules: - httpx - + config: web: spider_distance: 1 @@ -302,14 +302,14 @@ Recursive web spider ??? note "`spider.yml`" ```yaml title="~/.bbot/presets/spider.yml" description: Recursive web spider - + modules: - httpx - + blacklist: # Prevent spider from invalidating sessions by logging out - "RE:/.*(sign|log)[_-]?out" - + config: web: # how many links to follow in a row @@ -331,15 +331,15 @@ Enumerate subdomains via APIs, brute-force ??? note "`subdomain-enum.yml`" ```yaml title="~/.bbot/presets/subdomain-enum.yml" description: Enumerate subdomains via APIs, brute-force - + flags: # enable every module with the subdomain-enum flag - subdomain-enum - + output_modules: # output unique subdomains to TXT file - subdomains - + config: dns: threads: 25 @@ -365,10 +365,10 @@ Quick web scan ??? note "`web-basic.yml`" ```yaml title="~/.bbot/presets/web-basic.yml" description: Quick web scan - + include: - iis-shortnames - + flags: - web-basic ``` @@ -384,10 +384,10 @@ Take screenshots of webpages ??? note "`web-screenshots.yml`" ```yaml title="~/.bbot/presets/web-screenshots.yml" description: Take screenshots of webpages - + flags: - web-screenshots - + config: modules: gowitness: @@ -410,11 +410,11 @@ Aggressive web scan ??? note "`web-thorough.yml`" ```yaml title="~/.bbot/presets/web-thorough.yml" description: Aggressive web scan - + include: # include the web-basic preset - web-basic - + flags: - web-thorough ``` diff --git a/mkdocs.yml b/mkdocs.yml index 1802fc678a..4413fac487 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -7,7 +7,7 @@ site_description: >- # Repository repo_name: blacklanternsecurity/bbot repo_url: https://github.com/blacklanternsecurity/bbot -watch: +watch: - "mkdocs.yml" - "bbot" - "docs" @@ -29,7 +29,7 @@ nav: - Tips and Tricks: scanning/tips_and_tricks.md - Advanced Usage: scanning/advanced.md - Configuration: scanning/configuration.md - - Modules: + - Modules: - List of Modules: modules/list_of_modules.md - Nuclei: modules/nuclei.md - Custom YARA Rules: modules/custom_yara_rules.md