From 7db38fd334263a4d7d4cb51e8030f2fb65a07ed6 Mon Sep 17 00:00:00 2001 From: Dom Whewell Date: Sun, 15 Dec 2024 11:26:37 +0000 Subject: [PATCH] Rename to unarchive, move jar exclusions into module and restore helper --- bbot/core/helpers/libmagic.py | 2 + .../internal/{extract.py => unarchive.py} | 7 +- bbot/test/test_step_1/test_cli.py | 6 +- bbot/test/test_step_1/test_presets.py | 4 +- ...le_extract.py => test_module_unarchive.py} | 130 ++++++++++-------- 5 files changed, 86 insertions(+), 63 deletions(-) rename bbot/modules/internal/{extract.py => unarchive.py} (89%) rename bbot/test/test_step_2/module_tests/{test_module_extract.py => test_module_unarchive.py} (75%) diff --git a/bbot/core/helpers/libmagic.py b/bbot/core/helpers/libmagic.py index 535c99c8cb..37612f558e 100644 --- a/bbot/core/helpers/libmagic.py +++ b/bbot/core/helpers/libmagic.py @@ -20,7 +20,9 @@ def get_compression(mime_type): "application/fictionbook2+zip": "zip", # FictionBook 2.0 (Zip) "application/fictionbook3+zip": "zip", # FictionBook 3.0 (Zip) "application/gzip": "gzip", # Gzip compressed file + "application/java-archive": "zip", # Java Archive (JAR) "application/pak": "pak", # PAK archive + "application/vnd.android.package-archive": "zip", # Android package (APK) "application/vnd.comicbook-rar": "rar", # Comic book archive (RAR) "application/vnd.comicbook+zip": "zip", # Comic book archive (Zip) "application/vnd.ms-cab-compressed": "cab", # Microsoft Cabinet archive diff --git a/bbot/modules/internal/extract.py b/bbot/modules/internal/unarchive.py similarity index 89% rename from bbot/modules/internal/extract.py rename to bbot/modules/internal/unarchive.py index 259e038222..77ad2e2dee 100644 --- a/bbot/modules/internal/extract.py +++ b/bbot/modules/internal/unarchive.py @@ -3,10 +3,10 @@ from bbot.core.helpers.libmagic import get_magic_info, get_compression -class extract(BaseInternalModule): +class unarchive(BaseInternalModule): watched_events = ["FILESYSTEM"] produced_events = ["FILESYSTEM"] - flags = ["passive"] + flags = ["passive", "safe"] meta = { "description": "Extract different types of files into folders on the filesystem", "created_date": "2024-12-08", @@ -15,6 +15,7 @@ class extract(BaseInternalModule): deps_apt = ["7zip", "tar", "rar", "unrar", "gunzip"] async def setup(self): + self.ignore_compressions = ["application/java-archive", "application/vnd.android.package-archive"] self.compression_methods = { "zip": ["7z", "x", '-p""', "-aoa", "{filename}", "-o{extract_dir}/"], "bzip2": ["tar", "--overwrite", "-xvjf", "{filename}", "-C", "{extract_dir}/"], @@ -29,6 +30,8 @@ async def setup(self): async def filter_event(self, event): if "file" in event.tags: + if event.data["magic_mime_type"] in self.ignore_compressions: + return False, f"Ignoring file type: {event.data['magic_mime_type']}, {event.data['path']}" if not event.data["compression"] in self.compression_methods: return False, f"Extract unable to handle file type: {event.data['compression']}, {event.data['path']}" else: diff --git a/bbot/test/test_step_1/test_cli.py b/bbot/test/test_step_1/test_cli.py index 26aca10647..07fb4747a4 100644 --- a/bbot/test/test_step_1/test_cli.py +++ b/bbot/test/test_step_1/test_cli.py @@ -326,17 +326,17 @@ async def test_cli_args(monkeypatch, caplog, capsys, clean_default_config): monkeypatch.setattr("sys.argv", ["bbot", "-y"]) result = await cli._main() assert result is True - assert "Loaded 6/6 internal modules (aggregate,cloudcheck,dnsresolve,excavate,extract,speculate)" in caplog.text + assert "Loaded 6/6 internal modules (aggregate,cloudcheck,dnsresolve,excavate,unarchive,speculate)" in caplog.text caplog.clear() monkeypatch.setattr("sys.argv", ["bbot", "-em", "excavate", "speculate", "-y"]) result = await cli._main() assert result is True - assert "Loaded 4/4 internal modules (aggregate,cloudcheck,dnsresolve,extract)" in caplog.text + assert "Loaded 4/4 internal modules (aggregate,cloudcheck,dnsresolve,unarchive)" in caplog.text caplog.clear() monkeypatch.setattr("sys.argv", ["bbot", "-c", "speculate=false", "-y"]) result = await cli._main() assert result is True - assert "Loaded 5/5 internal modules (aggregate,cloudcheck,dnsresolve,excavate,extract)" in caplog.text + assert "Loaded 5/5 internal modules (aggregate,cloudcheck,dnsresolve,excavate,unarchive)" in caplog.text # custom target type out, err = capsys.readouterr() diff --git a/bbot/test/test_step_1/test_presets.py b/bbot/test/test_step_1/test_presets.py index 43f571e13e..3ac076b067 100644 --- a/bbot/test/test_step_1/test_presets.py +++ b/bbot/test/test_step_1/test_presets.py @@ -496,7 +496,7 @@ def test_preset_module_resolution(clean_default_config): assert set(preset.internal_modules) == { "aggregate", "excavate", - "extract", + "unarchive", "speculate", "cloudcheck", "dnsresolve", @@ -560,7 +560,7 @@ def test_preset_module_resolution(clean_default_config): "dnsresolve", "aggregate", "excavate", - "extract", + "unarchive", "txt", "httpx", "csv", diff --git a/bbot/test/test_step_2/module_tests/test_module_extract.py b/bbot/test/test_step_2/module_tests/test_module_unarchive.py similarity index 75% rename from bbot/test/test_step_2/module_tests/test_module_extract.py rename to bbot/test/test_step_2/module_tests/test_module_unarchive.py index 15d1e785b2..ca40e9e16c 100644 --- a/bbot/test/test_step_2/module_tests/test_module_extract.py +++ b/bbot/test/test_step_2/module_tests/test_module_unarchive.py @@ -4,9 +4,9 @@ from .base import ModuleTestBase -class TestExtract(ModuleTestBase): +class TestUnarchive(ModuleTestBase): targets = ["http://127.0.0.1:8888"] - modules_overrides = ["filedownload", "httpx", "excavate", "speculate", "extract"] + modules_overrides = ["filedownload", "httpx", "excavate", "speculate", "unarchive"] temp_path = Path("/tmp/.bbot_test") # Create a text file to compress @@ -52,69 +52,87 @@ async def setup_after_prep(self, module_test): """, ), ) - module_test.set_expect_requests( - dict(uri="/test.zip"), - dict( - response_data=self.zip_file.read_bytes(), - headers={"Content-Type": "application/zip"}, + ( + module_test.set_expect_requests( + dict(uri="/test.zip"), + dict( + response_data=self.zip_file.read_bytes(), + headers={"Content-Type": "application/zip"}, + ), ), - ), - module_test.set_expect_requests( - dict(uri="/test-zip.zip"), - dict( - response_data=self.zip_zip_file.read_bytes(), - headers={"Content-Type": "application/zip"}, + ) + ( + module_test.set_expect_requests( + dict(uri="/test-zip.zip"), + dict( + response_data=self.zip_zip_file.read_bytes(), + headers={"Content-Type": "application/zip"}, + ), ), - ), - module_test.set_expect_requests( - dict(uri="/test.bz2"), - dict( - response_data=self.bz2_file.read_bytes(), - headers={"Content-Type": "application/x-bzip2"}, + ) + ( + module_test.set_expect_requests( + dict(uri="/test.bz2"), + dict( + response_data=self.bz2_file.read_bytes(), + headers={"Content-Type": "application/x-bzip2"}, + ), ), - ), - module_test.set_expect_requests( - dict(uri="/test.xz"), - dict( - response_data=self.xz_file.read_bytes(), - headers={"Content-Type": "application/x-xz"}, + ) + ( + module_test.set_expect_requests( + dict(uri="/test.xz"), + dict( + response_data=self.xz_file.read_bytes(), + headers={"Content-Type": "application/x-xz"}, + ), ), - ), - module_test.set_expect_requests( - dict(uri="/test.7z"), - dict( - response_data=self.zip7_file.read_bytes(), - headers={"Content-Type": "application/x-7z-compressed"}, + ) + ( + module_test.set_expect_requests( + dict(uri="/test.7z"), + dict( + response_data=self.zip7_file.read_bytes(), + headers={"Content-Type": "application/x-7z-compressed"}, + ), ), - ), - module_test.set_expect_requests( - dict(uri="/test.rar"), - dict( - response_data=self.zip7_file.read_bytes(), - headers={"Content-Type": "application/vnd.rar"}, + ) + ( + module_test.set_expect_requests( + dict(uri="/test.rar"), + dict( + response_data=self.zip7_file.read_bytes(), + headers={"Content-Type": "application/vnd.rar"}, + ), ), - ), - module_test.set_expect_requests( - dict(uri="/test.lzma"), - dict( - response_data=self.lzma_file.read_bytes(), - headers={"Content-Type": "application/x-lzma"}, + ) + ( + module_test.set_expect_requests( + dict(uri="/test.lzma"), + dict( + response_data=self.lzma_file.read_bytes(), + headers={"Content-Type": "application/x-lzma"}, + ), ), - ), - module_test.set_expect_requests( - dict(uri="/test.tar"), - dict( - response_data=self.tar_file.read_bytes(), - headers={"Content-Type": "application/x-tar"}, + ) + ( + module_test.set_expect_requests( + dict(uri="/test.tar"), + dict( + response_data=self.tar_file.read_bytes(), + headers={"Content-Type": "application/x-tar"}, + ), ), - ), - module_test.set_expect_requests( - dict(uri="/test.tgz"), - dict( - response_data=self.tgz_file.read_bytes(), - headers={"Content-Type": "application/x-tgz"}, + ) + ( + module_test.set_expect_requests( + dict(uri="/test.tgz"), + dict( + response_data=self.tgz_file.read_bytes(), + headers={"Content-Type": "application/x-tgz"}, + ), ), - ), + ) def check(self, module_test, events): filesystem_events = [e for e in events if e.type == "FILESYSTEM"]