From 8db576fd38ab0c9ce2d91f3b844bc5de4a658495 Mon Sep 17 00:00:00 2001 From: github-actions Date: Thu, 7 Nov 2024 05:38:29 -0500 Subject: [PATCH 1/3] add native filetype + compression detection --- bbot/core/event/base.py | 16 +++++++++++++- bbot/test/test_step_1/test_events.py | 31 ++++++++++++++++++++++++++++ poetry.lock | 13 +++++++++++- pyproject.toml | 1 + 4 files changed, 59 insertions(+), 2 deletions(-) diff --git a/bbot/core/event/base.py b/bbot/core/event/base.py index 5a02213ce..2ee075d08 100644 --- a/bbot/core/event/base.py +++ b/bbot/core/event/base.py @@ -1540,7 +1540,21 @@ def _pretty_string(self): class FILESYSTEM(DictPathEvent): - pass + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + # detect type of file content using magic + from bbot.core.helpers.libmagic import get_magic_info, get_compression + extension, mime_type, description, confidence = get_magic_info(self.data["path"]) + self.data["magic_extension"] = extension + self.data["magic_mime_type"] = mime_type + self.data["magic_description"] = description + self.data["magic_confidence"] = confidence + # detection compression + compression = get_compression(mime_type) + if compression: + self.add_tag("compressed") + self.add_tag(f"{compression}-archive") + self.data["compression"] = compression class RAW_DNS_RECORD(DictHostEvent, DnsEvent): diff --git a/bbot/test/test_step_1/test_events.py b/bbot/test/test_step_1/test_events.py index 00ce75ff8..989fbcfb4 100644 --- a/bbot/test/test_step_1/test_events.py +++ b/bbot/test/test_step_1/test_events.py @@ -925,3 +925,34 @@ def test_event_closest_host(): vuln = scan.make_event( {"path": "/tmp/asdf.txt", "description": "test", "severity": "HIGH"}, "VULNERABILITY", parent=event3 ) + +def test_event_magic(): + from bbot.core.helpers.libmagic import get_magic_info, get_compression + + import base64 + zip_base64 = "UEsDBAoDAAAAAOMmZ1lR4FaHBQAAAAUAAAAIAAAAYXNkZi50eHRhc2RmClBLAQI/AwoDAAAAAOMmZ1lR4FaHBQAAAAUAAAAIACQAAAAAAAAAIICkgQAAAABhc2RmLnR4dAoAIAAAAAAAAQAYAICi2B77MNsBgKLYHvsw2wGAotge+zDbAVBLBQYAAAAAAQABAFoAAAArAAAAAAA=" + zip_bytes = base64.b64decode(zip_base64) + zip_file = Path("/tmp/.bbottestzipasdkfjalsdf.zip") + with open(zip_file, "wb") as f: + f.write(zip_bytes) + + # test magic helpers + extension, mime_type, description, confidence = get_magic_info(zip_file) + assert extension == ".zip" + assert mime_type == "application/zip" + assert description == "PKZIP Archive file" + assert confidence > 0 + assert get_compression(mime_type) == "zip" + + # test filesystem event + scan = Scanner() + event = scan.make_event({"path": zip_file}, "FILESYSTEM", parent=scan.root_event) + assert event.data["magic_extension"] == ".zip" + assert event.data["magic_mime_type"] == "application/zip" + assert event.data["magic_description"] == "PKZIP Archive file" + assert event.data["magic_confidence"] > 0 + assert event.data["compression"] == "zip" + assert "compressed" in event.tags + assert "zip-archive" in event.tags + + zip_file.unlink() diff --git a/poetry.lock b/poetry.lock index 7bfa9312e..fb5a3e77e 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1684,6 +1684,17 @@ files = [ {file = "ptyprocess-0.7.0.tar.gz", hash = "sha256:5c5d0a3b48ceee0b48485e0c26037c0acd7d29765ca3fbb5cb3831d347423220"}, ] +[[package]] +name = "puremagic" +version = "1.28" +description = "Pure python implementation of magic file detection" +optional = false +python-versions = "*" +files = [ + {file = "puremagic-1.28-py3-none-any.whl", hash = "sha256:e16cb9708ee2007142c37931c58f07f7eca956b3472489106a7245e5c3aa1241"}, + {file = "puremagic-1.28.tar.gz", hash = "sha256:195893fc129657f611b86b959aab337207d6df7f25372209269ed9e303c1a8c0"}, +] + [[package]] name = "pycodestyle" version = "2.12.1" @@ -3079,4 +3090,4 @@ type = ["pytest-mypy"] [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "b4905507ecbd1cd63d17aa42e3fba1df0c5c7da0d3e30594b0f4cae9b2bb0191" +content-hash = "841284005c611ae3f1e12e32162183efbd643ba634277a41b93c74e98881c032" diff --git a/pyproject.toml b/pyproject.toml index 383b81262..137497001 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -55,6 +55,7 @@ setproctitle = "^1.3.3" yara-python = "^4.5.1" pyzmq = "^26.0.3" httpx = "^0.27.0" +puremagic = "^1.28" [tool.poetry.group.dev.dependencies] flake8 = ">=6,<8" From 5f5349e3d57943f075c5dd7ceb23168afa794c76 Mon Sep 17 00:00:00 2001 From: github-actions Date: Thu, 7 Nov 2024 05:39:39 -0500 Subject: [PATCH 2/3] magic --- bbot/core/event/base.py | 1 + bbot/core/helpers/libmagic.py | 68 ++++++++++++++++++++++++++++ bbot/test/test_step_1/test_events.py | 2 + 3 files changed, 71 insertions(+) create mode 100644 bbot/core/helpers/libmagic.py diff --git a/bbot/core/event/base.py b/bbot/core/event/base.py index 2ee075d08..329a69dc1 100644 --- a/bbot/core/event/base.py +++ b/bbot/core/event/base.py @@ -1544,6 +1544,7 @@ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) # detect type of file content using magic from bbot.core.helpers.libmagic import get_magic_info, get_compression + extension, mime_type, description, confidence = get_magic_info(self.data["path"]) self.data["magic_extension"] = extension self.data["magic_mime_type"] = mime_type diff --git a/bbot/core/helpers/libmagic.py b/bbot/core/helpers/libmagic.py new file mode 100644 index 000000000..77a9eebce --- /dev/null +++ b/bbot/core/helpers/libmagic.py @@ -0,0 +1,68 @@ +import puremagic + + +def get_magic_info(file): + + magic_detections = puremagic.magic_file(file) + if magic_detections: + magic_detections.sort(key=lambda x: x.confidence, reverse=True) + detection = magic_detections[0] + return detection.extension, detection.mime_type, detection.name, detection.confidence + return "", "", "", 0 + + +def get_compression(mime_type): + mime_type = mime_type.lower() + # from https://github.com/cdgriffith/puremagic/blob/master/puremagic/magic_data.json + compression_map = { + "application/gzip": "gzip", # Gzip compressed file + "application/zip": "zip", # Zip archive + "application/x-bzip2": "bzip2", # Bzip2 compressed file + "application/x-xz": "xz", # XZ compressed file + "application/x-7z-compressed": "7z", # 7-Zip archive + "application/vnd.rar": "rar", # RAR archive + "application/x-lzma": "lzma", # LZMA compressed file + "application/x-compress": "compress", # Unix compress file + "application/zstd": "zstd", # Zstandard compressed file + "application/x-lz4": "lz4", # LZ4 compressed file + "application/x-tar": "tar", # Tar archive + "application/x-zip-compressed-fb2": "zip", # Zip archive (FB2) + "application/epub+zip": "zip", # EPUB book (Zip archive) + "application/pak": "pak", # PAK archive + "application/x-lha": "lha", # LHA archive + "application/arj": "arj", # ARJ archive + "application/vnd.ms-cab-compressed": "cab", # Microsoft Cabinet archive + "application/x-sit": "sit", # StuffIt archive + "application/binhex": "binhex", # BinHex encoded file + "application/x-lrzip": "lrzip", # Long Range ZIP + "application/x-alz": "alz", # ALZip archive + "application/x-tgz": "tgz", # Gzip compressed Tar archive + "application/x-gzip": "gzip", # Gzip compressed file + "application/x-lzip": "lzip", # Lzip compressed file + "application/x-zstd-compressed-tar": "zstd", # Zstandard compressed Tar archive + "application/x-lz4-compressed-tar": "lz4", # LZ4 compressed Tar archive + "application/vnd.comicbook+zip": "zip", # Comic book archive (Zip) + "application/vnd.palm": "palm", # Palm OS data + "application/fictionbook2+zip": "zip", # FictionBook 2.0 (Zip) + "application/fictionbook3+zip": "zip", # FictionBook 3.0 (Zip) + "application/x-cpio": "cpio", # CPIO archive + "application/x-java-pack200": "pack200", # Java Pack200 archive + "application/x-par2": "par2", # PAR2 recovery file + "application/x-rar-compressed": "rar", # RAR archive + "application/java-archive": "zip", # Java Archive (JAR) + "application/x-webarchive": "zip", # Web archive (Zip) + "application/vnd.android.package-archive": "zip", # Android package (APK) + "application/x-itunes-ipa": "zip", # iOS application archive (IPA) + "application/x-stuffit": "sit", # StuffIt archive + "application/x-archive": "ar", # Unix archive + "application/x-qpress": "qpress", # Qpress archive + "application/x-xar": "xar", # XAR archive + "application/x-ace": "ace", # ACE archive + "application/x-zoo": "zoo", # Zoo archive + "application/x-arc": "arc", # ARC archive + "application/x-zstd-compressed-tar": "zstd", # Zstandard compressed Tar archive + "application/x-lz4-compressed-tar": "lz4", # LZ4 compressed Tar archive + "application/vnd.comicbook-rar": "rar", # Comic book archive (RAR) + } + + return compression_map.get(mime_type, "") diff --git a/bbot/test/test_step_1/test_events.py b/bbot/test/test_step_1/test_events.py index 989fbcfb4..df6307429 100644 --- a/bbot/test/test_step_1/test_events.py +++ b/bbot/test/test_step_1/test_events.py @@ -926,10 +926,12 @@ def test_event_closest_host(): {"path": "/tmp/asdf.txt", "description": "test", "severity": "HIGH"}, "VULNERABILITY", parent=event3 ) + def test_event_magic(): from bbot.core.helpers.libmagic import get_magic_info, get_compression import base64 + zip_base64 = "UEsDBAoDAAAAAOMmZ1lR4FaHBQAAAAUAAAAIAAAAYXNkZi50eHRhc2RmClBLAQI/AwoDAAAAAOMmZ1lR4FaHBQAAAAUAAAAIACQAAAAAAAAAIICkgQAAAABhc2RmLnR4dAoAIAAAAAAAAQAYAICi2B77MNsBgKLYHvsw2wGAotge+zDbAVBLBQYAAAAAAQABAFoAAAArAAAAAAA=" zip_bytes = base64.b64decode(zip_base64) zip_file = Path("/tmp/.bbottestzipasdkfjalsdf.zip") From 1cb32e29b93f4dcfcf5f491233ccc30bc9cd0ace Mon Sep 17 00:00:00 2001 From: github-actions Date: Thu, 7 Nov 2024 05:50:45 -0500 Subject: [PATCH 3/3] fix tests --- bbot/core/event/base.py | 53 +++++++++++++++------------- bbot/test/test_step_1/test_events.py | 24 ++++++++----- 2 files changed, 44 insertions(+), 33 deletions(-) diff --git a/bbot/core/event/base.py b/bbot/core/event/base.py index 329a69dc1..30089dcc0 100644 --- a/bbot/core/event/base.py +++ b/bbot/core/event/base.py @@ -503,12 +503,13 @@ def scope_distance(self, scope_distance): for t in list(self.tags): if t.startswith("distance-"): self.remove_tag(t) - if scope_distance == 0: - self.add_tag("in-scope") - self.remove_tag("affiliate") - else: - self.remove_tag("in-scope") - self.add_tag(f"distance-{new_scope_distance}") + if self.host: + if scope_distance == 0: + self.add_tag("in-scope") + self.remove_tag("affiliate") + else: + self.remove_tag("in-scope") + self.add_tag(f"distance-{new_scope_distance}") self._scope_distance = new_scope_distance # apply recursively to parent events parent_scope_distance = getattr(self.parent, "scope_distance", None) @@ -1018,20 +1019,21 @@ def __init__(self, *args, **kwargs): class DictPathEvent(DictEvent): def sanitize_data(self, data): new_data = dict(data) + new_data["path"] = str(new_data["path"]) file_blobs = getattr(self.scan, "_file_blobs", False) folder_blobs = getattr(self.scan, "_folder_blobs", False) blob = None try: - data_path = Path(data["path"]) - if data_path.is_file(): + self._data_path = Path(data["path"]) + if self._data_path.is_file(): self.add_tag("file") if file_blobs: - with open(data_path, "rb") as file: + with open(self._data_path, "rb") as file: blob = file.read() - elif data_path.is_dir(): + elif self._data_path.is_dir(): self.add_tag("folder") if folder_blobs: - blob = self._tar_directory(data_path) + blob = self._tar_directory(self._data_path) except KeyError: pass if blob: @@ -1542,20 +1544,21 @@ def _pretty_string(self): class FILESYSTEM(DictPathEvent): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - # detect type of file content using magic - from bbot.core.helpers.libmagic import get_magic_info, get_compression - - extension, mime_type, description, confidence = get_magic_info(self.data["path"]) - self.data["magic_extension"] = extension - self.data["magic_mime_type"] = mime_type - self.data["magic_description"] = description - self.data["magic_confidence"] = confidence - # detection compression - compression = get_compression(mime_type) - if compression: - self.add_tag("compressed") - self.add_tag(f"{compression}-archive") - self.data["compression"] = compression + if self._data_path.is_file(): + # detect type of file content using magic + from bbot.core.helpers.libmagic import get_magic_info, get_compression + + extension, mime_type, description, confidence = get_magic_info(self.data["path"]) + self.data["magic_extension"] = extension + self.data["magic_mime_type"] = mime_type + self.data["magic_description"] = description + self.data["magic_confidence"] = confidence + # detection compression + compression = get_compression(mime_type) + if compression: + self.add_tag("compressed") + self.add_tag(f"{compression}-archive") + self.data["compression"] = compression class RAW_DNS_RECORD(DictHostEvent, DnsEvent): diff --git a/bbot/test/test_step_1/test_events.py b/bbot/test/test_step_1/test_events.py index df6307429..1ebb38fea 100644 --- a/bbot/test/test_step_1/test_events.py +++ b/bbot/test/test_step_1/test_events.py @@ -946,15 +946,23 @@ def test_event_magic(): assert confidence > 0 assert get_compression(mime_type) == "zip" - # test filesystem event + # test filesystem event - file scan = Scanner() event = scan.make_event({"path": zip_file}, "FILESYSTEM", parent=scan.root_event) - assert event.data["magic_extension"] == ".zip" - assert event.data["magic_mime_type"] == "application/zip" - assert event.data["magic_description"] == "PKZIP Archive file" - assert event.data["magic_confidence"] > 0 - assert event.data["compression"] == "zip" - assert "compressed" in event.tags - assert "zip-archive" in event.tags + assert event.data == { + "path": "/tmp/.bbottestzipasdkfjalsdf.zip", + "magic_extension": ".zip", + "magic_mime_type": "application/zip", + "magic_description": "PKZIP Archive file", + "magic_confidence": 0.9, + "compression": "zip", + } + assert event.tags == {"file", "zip-archive", "compressed"} + + # test filesystem event - folder + scan = Scanner() + event = scan.make_event({"path": "/tmp"}, "FILESYSTEM", parent=scan.root_event) + assert event.data == {"path": "/tmp"} + assert event.tags == {"folder"} zip_file.unlink()