diff --git a/.gitignore b/.gitignore index 57ff75186f..0c6b86a341 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ __pycache__/ .coverage* +/data/ +/neo4j/ diff --git a/bbot/db/neo4j.py b/bbot/db/neo4j.py deleted file mode 100644 index 7d718d64be..0000000000 --- a/bbot/db/neo4j.py +++ /dev/null @@ -1,59 +0,0 @@ -import py2neo -import logging -from datetime import datetime - -log = logging.getLogger("bbot.db.neo4j") - -# uncomment this to enable neo4j debugging -# logging.basicConfig(level=logging.DEBUG, format="%(message)s") - - -class Neo4j: - """ - docker run --rm -p 7687:7687 -p 7474:7474 --env NEO4J_AUTH=neo4j/bbotislife neo4j - """ - - def __init__(self, uri="bolt://localhost:7687", username="neo4j", password="bbotislife"): - self.graph = py2neo.Graph(uri=uri, auth=(username, password)) - - def insert_event(self, event): - self.insert_events([event]) - - def insert_events(self, events): - event_nodes = dict() - event_list = [] - - for event in events: - event_json = event.json(mode="graph") - source_id = event_json.get("source", "") - if not source_id: - log.warning(f"Skipping event without source: {event}") - continue - event_node = self.make_node(event_json) - event_nodes[event.id] = event_node - event_list.append(event_node) - - if event_nodes: - subgraph = list(event_nodes.values())[0] - for dest_event in event_list: - module = dest_event.pop("module", "TARGET") - source_id = dest_event["source"] - source_type = source_id.split(":")[0] - try: - source_event = event_nodes[source_id] - except KeyError: - source_event = self.make_node({"type": source_type, "id": source_id}) - timestamp = datetime.fromtimestamp(dest_event.pop("timestamp")) - relation = py2neo.Relationship(source_event, module, dest_event, timestamp=timestamp) - subgraph = subgraph | relation - - self.graph.merge(subgraph) - - @staticmethod - def make_node(event): - event = dict(event) - event_type = event.pop("type") - event_node = py2neo.Node(event_type, **event) - event_node.__primarylabel__ = event_type - event_node.__primarykey__ = "id" - return event_node diff --git a/bbot/modules/output/neo4j.py b/bbot/modules/output/neo4j.py index 4ed01cefbe..2cc0835443 100644 --- a/bbot/modules/output/neo4j.py +++ b/bbot/modules/output/neo4j.py @@ -1,10 +1,27 @@ -from bbot.db.neo4j import Neo4j +from neo4j import AsyncGraphDatabase + from bbot.modules.output.base import BaseOutputModule class neo4j(BaseOutputModule): """ - docker run -p 7687:7687 -p 7474:7474 -v "$(pwd)/data/:/data/" -e NEO4J_AUTH=neo4j/bbotislife neo4j + # start Neo4j in the background with docker + docker run -d -p 7687:7687 -p 7474:7474 -v "$(pwd)/neo4j/:/data/" -e NEO4J_AUTH=neo4j/bbotislife neo4j + + # view all running docker containers + > docker ps + + # view all docker containers + > docker ps -a + + # stop a docker container + > docker stop + + # remove a docker container + > docker remove + + # start a stopped container + > docker start """ watched_events = ["*"] @@ -15,26 +32,51 @@ class neo4j(BaseOutputModule): "username": "Neo4j username", "password": "Neo4j password", } - deps_pip = ["git+https://github.com/blacklanternsecurity/py2neo"] - _batch_size = 50 + deps_pip = ["neo4j"] _preserve_graph = True async def setup(self): try: - self.neo4j = await self.scan.run_in_executor( - Neo4j, + self.driver = AsyncGraphDatabase.driver( uri=self.config.get("uri", self.options["uri"]), - username=self.config.get("username", self.options["username"]), - password=self.config.get("password", self.options["password"]), + auth=( + self.config.get("username", self.options["username"]), + self.config.get("password", self.options["password"]), + ), ) - await self.scan.run_in_executor(self.neo4j.insert_event, self.scan.root_event) + self.session = self.driver.session() + await self.handle_event(self.scan.root_event) except Exception as e: - self.warning(f"Error setting up Neo4j: {e}") - return False + return False, f"Error setting up Neo4j: {e}" return True async def handle_event(self, event): - await self.scan.run_in_executor(self.neo4j.insert_event, event) + # create events + src_id = await self.merge_event(event.get_source(), id_only=True) + dst_id = await self.merge_event(event) + # create relationship + cypher = f""" + MATCH (a) WHERE id(a) = $src_id + MATCH (b) WHERE id(b) = $dst_id + MERGE (a)-[_:{event.module}]->(b) + SET _.timestamp = $timestamp""" + await self.session.run(cypher, src_id=src_id, dst_id=dst_id, timestamp=event.timestamp) + + async def merge_event(self, event, id_only=False): + if id_only: + eventdata = {"type": event.type, "id": event.id} + else: + eventdata = event.json(mode="graph") + # we pop the timestamp because it belongs on the relationship + eventdata.pop("timestamp") + cypher = f"""MERGE (_:{event.type} {{ id: $eventdata['id'] }}) + SET _ += $eventdata + RETURN id(_)""" + # insert event + result = await self.session.run(cypher, eventdata=eventdata) + # get Neo4j id + return (await result.single()).get("id(_)") - async def handle_batch(self, *events): - await self.scan.run_in_executor(self.neo4j.insert_events, events) + async def cleanup(self): + await self.session.close() + await self.driver.close() diff --git a/bbot/test/bbot_fixtures.py b/bbot/test/bbot_fixtures.py index 7170e5a70b..85af696cd5 100644 --- a/bbot/test/bbot_fixtures.py +++ b/bbot/test/bbot_fixtures.py @@ -52,25 +52,6 @@ def bbot_scanner(): return Scanner -@pytest.fixture -def neograph(monkeypatch, helpers): - helpers.depsinstaller.pip_install(["py2neo"]) - - class NeoGraph: - def __init__(self, *args, **kwargs): - pass - - def merge(self, *args, **kwargs): - return True - - import py2neo - - monkeypatch.setattr(py2neo, "Graph", NeoGraph) - from bbot.db.neo4j import Neo4j - - return Neo4j(uri="bolt://127.0.0.1:1111") - - @pytest.fixture def scan(monkeypatch, bbot_config): from bbot.scanner import Scanner diff --git a/bbot/test/test_step_1/test_scan.py b/bbot/test/test_step_1/test_scan.py index 9aa89ffe21..464f2038b8 100644 --- a/bbot/test/test_step_1/test_scan.py +++ b/bbot/test/test_step_1/test_scan.py @@ -6,7 +6,6 @@ async def test_scan( events, bbot_config, helpers, - neograph, monkeypatch, bbot_scanner, mock_dns, diff --git a/bbot/test/test_step_2/module_tests/test_module_neo4j.py b/bbot/test/test_step_2/module_tests/test_module_neo4j.py index 7a5a010631..fcb21b94bb 100644 --- a/bbot/test/test_step_2/module_tests/test_module_neo4j.py +++ b/bbot/test/test_step_2/module_tests/test_module_neo4j.py @@ -3,18 +3,35 @@ class TestNeo4j(ModuleTestBase): async def setup_before_prep(self, module_test): - # install py2neo + # install neo4j deps_pip = module_test.preloaded["neo4j"]["deps"]["pip"] await module_test.scan.helpers.depsinstaller.pip_install(deps_pip) - class MockGraph: + self.neo4j_used = False + + class MockResult: + async def single(s): + self.neo4j_used = True + return {"id(_)": 1} + + class MockSession: + async def run(s, *args, **kwargs): + return MockResult() + + async def close(self): + pass + + class MockDriver: def __init__(self, *args, **kwargs): - self.used = False + pass + + def session(self, *args, **kwargs): + return MockSession() - def merge(self, *args, **kwargs): - self.used = True + async def close(self): + pass - module_test.monkeypatch.setattr("py2neo.Graph", MockGraph) + module_test.monkeypatch.setattr("neo4j.AsyncGraphDatabase.driver", MockDriver) def check(self, module_test, events): - assert module_test.scan.modules["neo4j"].neo4j.graph.used == True + assert self.neo4j_used == True