From 57622c23bc553396aef9165452a62b29aa5003ec Mon Sep 17 00:00:00 2001
From: Dom Whewell <dom.whewell@sage.com>
Date: Mon, 4 Nov 2024 10:59:27 +0000
Subject: [PATCH 01/29] Added an internal module to always extract different
 file types

---
 bbot/modules/internal/extract.py              | 120 ++++++++++++++++++
 .../module_tests/test_module_extract.py       |  89 +++++++++++++
 2 files changed, 209 insertions(+)
 create mode 100644 bbot/modules/internal/extract.py
 create mode 100644 bbot/test/test_step_2/module_tests/test_module_extract.py

diff --git a/bbot/modules/internal/extract.py b/bbot/modules/internal/extract.py
new file mode 100644
index 0000000000..032b1399a5
--- /dev/null
+++ b/bbot/modules/internal/extract.py
@@ -0,0 +1,120 @@
+import zipfile
+
+from pathlib import Path
+from subprocess import CalledProcessError
+from bbot.modules.internal.base import BaseInternalModule
+
+
+class extract(BaseInternalModule):
+    watched_events = ["FILESYSTEM"]
+    produced_events = ["FILESYSTEM"]
+    flags = ["passive"]
+    meta = {
+        "description": "Extract different types of files into folders on the filesystem",
+        "created_date": "2024-11-04",
+        "author": "@domwhewell-sage",
+    }
+    options = {
+        "threads": 4,
+    }
+    options_desc = {
+        "threads": "Maximum jadx threads for extracting apk's, default: 4",
+    }
+    deps_ansible = [
+        {
+            "name": "Install latest JRE (Debian)",
+            "package": {"name": ["default-jre"], "state": "present"},
+            "become": True,
+            "when": "ansible_facts['os_family'] == 'Debian'",
+        },
+        {
+            "name": "Install latest JRE (Arch)",
+            "package": {"name": ["jre-openjdk"], "state": "present"},
+            "become": True,
+            "when": "ansible_facts['os_family'] == 'Archlinux'",
+        },
+        {
+            "name": "Install latest JRE (Fedora)",
+            "package": {"name": ["java-openjdk-headless"], "state": "present"},
+            "become": True,
+            "when": "ansible_facts['os_family'] == 'RedHat'",
+        },
+        {
+            "name": "Install latest JRE (Alpine)",
+            "package": {"name": ["openjdk11"], "state": "present"},
+            "become": True,
+            "when": "ansible_facts['os_family'] == 'Alpine'",
+        },
+        {
+            "name": "Download jadx",
+            "unarchive": {
+                "src": "https://github.com/skylot/jadx/releases/download/v1.5.0/jadx-1.5.0.zip",
+                "include": "bin/jadx",
+                "dest": "#{BBOT_TOOLS}",
+                "remote_src": True,
+            },
+        },
+    ]
+
+    zipcompressed = ["doc", "dot", "docm", "docx", "ppt", "pptm", "pptx", "xls", "xlt", "xlsm", "xlsx", "zip"]
+    jadx = ["xapk", "apk"]
+    allowed_extensions = zipcompressed + jadx
+
+    async def setup(self):
+        self.threads = self.config.get("threads", 4)
+        return True
+
+    async def filter_event(self, event):
+        if "file" in event.tags:
+            if not any(event.data["path"].endswith(f".{ext}") for ext in self.allowed_extensions):
+                return False, "Extract unable to handle file type"
+        else:
+            return False, "Event is not a file"
+        return True
+
+    async def handle_event(self, event):
+        path = Path(event.data["path"])
+        extension = path.suffix.strip(".").lower()
+        output_dir = path.parent / path.name.replace(".", "_")
+        self.helpers.mkdir(output_dir)
+
+        # Use the appropriate extraction method based on the file type
+        self.info(f"Extracting {path} to {output_dir}")
+        if extension in self.zipcompressed:
+            success = self.extract_zip_file(path, output_dir)
+        elif extension in self.jadx:
+            success = await self.decompile_apk(path, output_dir)
+
+        # If the extraction was successful, emit the event
+        if success:
+            await self.emit_event(
+                {"path": str(output_dir)},
+                "FILESYSTEM",
+                tags="folder",
+                parent=event,
+                context=f'extracted "{path}" to: {output_dir}',
+            )
+        else:
+            output_dir.rmdir()
+
+    def extract_zip_file(self, path, output_dir):
+        try:
+            with zipfile.ZipFile(path, "r") as zip_ref:
+                zip_ref.extractall(output_dir)
+        except Exception as e:
+            self.warning(f"Error extracting {path}. Exception: {repr(e)}")
+            return False
+        return True
+
+    async def decompile_apk(self, path, output_dir):
+        command = ["jadx", "--threads-count", self.threads, "--output-dir", str(output_dir), str(path)]
+        try:
+            output = await self.run_process(command, check=True)
+        except CalledProcessError as e:
+            self.warning(f"Error decompiling {path}. STDERR: {repr(e.stderr)}")
+            return False
+        if not Path(output_dir / "resources").exists() and not Path(output_dir / "sources").exists():
+            self.warning(f"JADX was unable to decompile {path}.")
+            self.warning(output)
+            return False
+        return True
diff --git a/bbot/test/test_step_2/module_tests/test_module_extract.py b/bbot/test/test_step_2/module_tests/test_module_extract.py
new file mode 100644
index 0000000000..15eedbc03e
--- /dev/null
+++ b/bbot/test/test_step_2/module_tests/test_module_extract.py
@@ -0,0 +1,89 @@
+import zipfile
+
+from pathlib import Path
+from .base import ModuleTestBase, tempapkfile
+
+
+class TestExtractZip(ModuleTestBase):
+    targets = ["http://127.0.0.1:8888"]
+    modules_overrides = ["filedownload", "httpx", "excavate", "speculate", "extract"]
+
+    temp_path = Path("/tmp/.bbot_test")
+    zip_file = temp_path / "test.zip"
+    with zipfile.ZipFile(zip_file, "w") as z:
+        z.writestr("test.txt", "This is a test file")
+
+    async def setup_after_prep(self, module_test):
+        module_test.set_expect_requests(
+            dict(uri="/"),
+            dict(
+                response_data='<a href="/test.zip"/>',
+            ),
+        )
+        module_test.set_expect_requests(
+            dict(uri="/test.zip"),
+            dict(
+                response_data=self.zip_file.read_bytes(),
+                headers={"Content-Type": "application/zip"},
+            ),
+        )
+
+    def check(self, module_test, events):
+        filesystem_events = [e for e in events if e.type == "FILESYSTEM"]
+
+        zip_file_event = [e for e in filesystem_events if "test.zip" in e.data["path"]]
+        assert 1 == len(zip_file_event), "No zip file found"
+        file = Path(zip_file_event[0].data["path"])
+        assert file.is_file(), f"File not found at {file}"
+        extract_event = [e for e in filesystem_events if "test_zip" in e.data["path"] and "folder" in e.tags]
+        assert 1 == len(extract_event), "Failed to extract zip"
+        extract_path = Path(extract_event[0].data["path"])
+        assert extract_path.is_dir(), "Destination folder doesn't exist"
+
+
+class TestExtractApk(ModuleTestBase):
+    modules_overrides = ["apkpure", "google_playstore", "speculate", "extract"]
+    apk_file = tempapkfile()
+
+    async def setup_after_prep(self, module_test):
+        await module_test.mock_dns({"blacklanternsecurity.com": {"A": ["127.0.0.99"]}})
+        module_test.httpx_mock.add_response(
+            url="https://play.google.com/store/search?q=blacklanternsecurity&c=apps",
+            text="""<!DOCTYPE html>
+            <html>
+            <head>
+            <title>"blacklanternsecurity" - Android Apps on Google Play</title>
+            </head>
+            <body>
+            <a href="/store/apps/details?id=com.bbot.test&pcampaignid=dontmatchme&pli=1"/>
+            </body>
+            </html>""",
+        )
+        module_test.httpx_mock.add_response(
+            url="https://play.google.com/store/apps/details?id=com.bbot.test",
+            text="""<!DOCTYPE html>
+            <html>
+            <head>
+            <title>BBOT</title>
+            </head>
+            <body>
+            <meta name="appstore:developer_url" content="https://www.blacklanternsecurity.com">
+            </div>
+            </div>
+            </body>
+            </html>""",
+        )
+        module_test.httpx_mock.add_response(
+            url="https://d.apkpure.com/b/XAPK/com.bbot.test?version=latest",
+            content=self.apk_file,
+        )
+
+    def check(self, module_test, events):
+        extract_event = [
+            e
+            for e in events
+            if e.type == "FILESYSTEM" and "com_bbot_test_xapk" in e.data["path"] and "folder" in e.tags
+        ]
+        assert 1 == len(extract_event), "Failed to extract apk"
+        extract_path = Path(extract_event[0].data["path"])
+        assert extract_path.is_dir(), "Destination apk doesn't exist"

From 2665bd93d530411419f63afc56e2ac3e4f39b75b Mon Sep 17 00:00:00 2001
From: Dom Whewell <dom.whewell@sage.com>
Date: Wed, 6 Nov 2024 19:16:07 +0000
Subject: [PATCH 02/29] Fix some tests

---
 bbot/test/test_step_1/test_cli.py     |  6 +++---
 bbot/test/test_step_1/test_presets.py | 10 +++++++++-
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/bbot/test/test_step_1/test_cli.py b/bbot/test/test_step_1/test_cli.py
index f34b7c1474..e689c39740 100644
--- a/bbot/test/test_step_1/test_cli.py
+++ b/bbot/test/test_step_1/test_cli.py
@@ -314,17 +314,17 @@ async def test_cli_args(monkeypatch, caplog, capsys, clean_default_config):
     monkeypatch.setattr("sys.argv", ["bbot", "-y"])
     result = await cli._main()
     assert result == True
-    assert "Loaded 5/5 internal modules (aggregate,cloudcheck,dnsresolve,excavate,speculate)" in caplog.text
+    assert "Loaded 6/6 internal modules (aggregate,cloudcheck,dnsresolve,excavate,extract,speculate)" in caplog.text
     caplog.clear()
     monkeypatch.setattr("sys.argv", ["bbot", "-em", "excavate", "speculate", "-y"])
     result = await cli._main()
     assert result == True
-    assert "Loaded 3/3 internal modules (aggregate,cloudcheck,dnsresolve)" in caplog.text
+    assert "Loaded 4/4 internal modules (aggregate,cloudcheck,dnsresolve,extract)" in caplog.text
     caplog.clear()
     monkeypatch.setattr("sys.argv", ["bbot", "-c", "speculate=false", "-y"])
     result = await cli._main()
     assert result == True
-    assert "Loaded 4/4 internal modules (aggregate,cloudcheck,dnsresolve,excavate)" in caplog.text
+    assert "Loaded 5/5 internal modules (aggregate,cloudcheck,dnsresolve,excavate,extract)" in caplog.text
 
     # custom target type
     out, err = capsys.readouterr()
diff --git a/bbot/test/test_step_1/test_presets.py b/bbot/test/test_step_1/test_presets.py
index ede53b632c..e93728d962 100644
--- a/bbot/test/test_step_1/test_presets.py
+++ b/bbot/test/test_step_1/test_presets.py
@@ -483,7 +483,14 @@ def test_preset_module_resolution(clean_default_config):
     # make sure we have the expected defaults
     assert not preset.scan_modules
     assert set(preset.output_modules) == {"python", "csv", "txt", "json"}
-    assert set(preset.internal_modules) == {"aggregate", "excavate", "speculate", "cloudcheck", "dnsresolve"}
+    assert set(preset.internal_modules) == {
+        "aggregate",
+        "excavate",
+        "extract",
+        "speculate",
+        "cloudcheck",
+        "dnsresolve",
+    }
     assert preset.modules == set(preset.output_modules).union(set(preset.internal_modules))
 
     # make sure dependency resolution works as expected
@@ -543,6 +550,7 @@ def test_preset_module_resolution(clean_default_config):
         "dnsresolve",
         "aggregate",
         "excavate",
+        "extract",
         "txt",
         "httpx",
         "csv",

From f329ecb9172dc032f4b6c32ba9c0d85f3d7500bc Mon Sep 17 00:00:00 2001
From: Dom Whewell <dom.whewell@sage.com>
Date: Wed, 6 Nov 2024 21:12:43 +0000
Subject: [PATCH 03/29] Add `extra_opts` to ansible unarchive

---
 bbot/modules/internal/extract.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/bbot/modules/internal/extract.py b/bbot/modules/internal/extract.py
index 032b1399a5..562b821c05 100644
--- a/bbot/modules/internal/extract.py
+++ b/bbot/modules/internal/extract.py
@@ -51,6 +51,7 @@ class extract(BaseInternalModule):
                 "src": "https://github.com/skylot/jadx/releases/download/v1.5.0/jadx-1.5.0.zip",
                 "include": "bin/jadx",
                 "dest": "#{BBOT_TOOLS}",
+                "extra_opts": "-j",
                 "remote_src": True,
             },
         },

From 95b4cbb57b2b13183b35c58cb9b05b823ea14887 Mon Sep 17 00:00:00 2001
From: Dom Whewell <dom.whewell@sage.com>
Date: Wed, 6 Nov 2024 22:10:42 +0000
Subject: [PATCH 04/29] Ugh have to include the libs

---
 bbot/modules/internal/extract.py | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/bbot/modules/internal/extract.py b/bbot/modules/internal/extract.py
index 562b821c05..bc388983f5 100644
--- a/bbot/modules/internal/extract.py
+++ b/bbot/modules/internal/extract.py
@@ -45,13 +45,16 @@ class extract(BaseInternalModule):
             "become": True,
             "when": "ansible_facts['os_family'] == 'Alpine'",
         },
+        {
+            "name": "Create jadx directory",
+            "file": {"path": "#{BBOT_TOOLS}/jadx", "state": "directory", "mode": "0755"},
+        },
         {
             "name": "Download jadx",
             "unarchive": {
                 "src": "https://github.com/skylot/jadx/releases/download/v1.5.0/jadx-1.5.0.zip",
-                "include": "bin/jadx",
-                "dest": "#{BBOT_TOOLS}",
-                "extra_opts": "-j",
+                "include": ["lib/jadx-1.5.0-all.jar", "bin/jadx"],
+                "dest": "#{BBOT_TOOLS}/jadx",
                 "remote_src": True,
             },
         },
@@ -108,7 +111,14 @@ def extract_zip_file(self, path, output_dir):
         return True
 
     async def decompile_apk(self, path, output_dir):
-        command = ["jadx", "--threads-count", self.threads, "--output-dir", str(output_dir), str(path)]
+        command = [
+            f"{self.scan.helpers.tools_dir}/jadx/bin/jadx",
+            "--threads-count",
+            self.threads,
+            "--output-dir",
+            str(output_dir),
+            str(path),
+        ]
         try:
             output = await self.run_process(command, check=True)
         except CalledProcessError as e:

From f72315fda447c059bc9ecb015e349adca0cc0933 Mon Sep 17 00:00:00 2001
From: Dom Whewell <dom.whewell@sage.com>
Date: Tue, 12 Nov 2024 10:09:36 +0000
Subject: [PATCH 05/29] Add a map of the different compression types, comment
 them until a test has been written

---
 bbot/modules/internal/extract.py              | 409 ++++++++++++++----
 .../module_tests/test_module_extract.py       |  50 +--
 2 files changed, 334 insertions(+), 125 deletions(-)

diff --git a/bbot/modules/internal/extract.py b/bbot/modules/internal/extract.py
index bc388983f5..37ea82d546 100644
--- a/bbot/modules/internal/extract.py
+++ b/bbot/modules/internal/extract.py
@@ -1,7 +1,16 @@
 import zipfile
 
+# import bz2
+# import lzma
+# import expak
+# import tarfile
+# import rarfile
+# import py7zr
+# import zstandard as zstd
+# import lz4.frame
+# import shutil
+
 from pathlib import Path
-from subprocess import CalledProcessError
 from bbot.modules.internal.base import BaseInternalModule
 
 
@@ -14,80 +23,62 @@ class extract(BaseInternalModule):
         "created_date": "2024-11-04",
         "author": "@domwhewell-sage",
     }
-    options = {
-        "threads": 4,
-    }
-    options_desc = {
-        "threads": "Maximum jadx threads for extracting apk's, default: 4",
-    }
-    deps_ansible = [
-        {
-            "name": "Install latest JRE (Debian)",
-            "package": {"name": ["default-jre"], "state": "present"},
-            "become": True,
-            "when": "ansible_facts['os_family'] == 'Debian'",
-        },
-        {
-            "name": "Install latest JRE (Arch)",
-            "package": {"name": ["jre-openjdk"], "state": "present"},
-            "become": True,
-            "when": "ansible_facts['os_family'] == 'Archlinux'",
-        },
-        {
-            "name": "Install latest JRE (Fedora)",
-            "package": {"name": ["java-openjdk-headless"], "state": "present"},
-            "become": True,
-            "when": "ansible_facts['os_family'] == 'RedHat'",
-        },
-        {
-            "name": "Install latest JRE (Alpine)",
-            "package": {"name": ["openjdk11"], "state": "present"},
-            "become": True,
-            "when": "ansible_facts['os_family'] == 'Alpine'",
-        },
-        {
-            "name": "Create jadx directory",
-            "file": {"path": "#{BBOT_TOOLS}/jadx", "state": "directory", "mode": "0755"},
-        },
-        {
-            "name": "Download jadx",
-            "unarchive": {
-                "src": "https://github.com/skylot/jadx/releases/download/v1.5.0/jadx-1.5.0.zip",
-                "include": ["lib/jadx-1.5.0-all.jar", "bin/jadx"],
-                "dest": "#{BBOT_TOOLS}/jadx",
-                "remote_src": True,
-            },
-        },
-    ]
-
-    zipcompressed = ["doc", "dot", "docm", "docx", "ppt", "pptm", "pptx", "xls", "xlt", "xlsm", "xlsx", "zip"]
-    jadx = ["xapk", "apk"]
-    allowed_extensions = zipcompressed + jadx
+    # deps_pip = ["rarfile", "py7zr", "zstandard", "lz4"]
 
     async def setup(self):
-        self.threads = self.config.get("threads", 4)
+        self.compression_methods = {
+            "zip": self.extract_zip_file,
+            #     "bz2": lambda path, output_dir: self.extract_bz2_file(path, output_dir / "content.txt"),
+            #     "xz": lambda path, output_dir: self.extract_xz_file(path, output_dir / "content.txt"),
+            #     "7z": self.extract_7z_file,
+            #     "rar": self.extract_rar_file,
+            #     "lzma": lambda path, output_dir: self.extract_lzma_file(path, output_dir / "content.txt"),
+            #     "compress": lambda path, output_dir: self.extract_compress_file(path, output_dir / "content.txt"),
+            #     "zstd": lambda path, output_dir: self.extract_zstd_file(path, output_dir / "content.txt"),
+            #     "lz4": lambda path, output_dir: self.extract_lz4_file(path, output_dir / "content.txt"),
+            #     "tar": self.extract_tar_file,
+            #     "pak": self.extract_pak_file,
+            #     "lha": self.extract_lha_file,
+            #     "arj": self.extract_arj_file,
+            #     "cab": self.extract_cab_file,
+            #     "sit": self.extract_sit_file,
+            #     "binhex": lambda path, output_dir: self.extract_binhex_file(path, output_dir / "content.txt"),
+            #     "lrzip": lambda path, output_dir: self.extract_lrzip_file(path, output_dir / "content.txt"),
+            #     "alz": self.extract_alz_file,
+            #     "tgz": self.extract_tgz_file,
+            #     "gzip": lambda path, output_dir: self.extract_gzip_file(path, output_dir / "content.txt"),
+            #     "lzip": lambda path, output_dir: self.extract_lzip_file(path, output_dir / "content.txt"),
+            #     "palm": lambda path, output_dir: self.extract_palm_file(path, output_dir / "content.txt"),
+            #     "cpio": self.extract_cpio_file,
+            #     "pack200": lambda path, output_dir: self.extract_pack200_file(path, output_dir / "content.txt"),
+            #     "par2": lambda path, output_dir: self.extract_par2_file(path, output_dir / "content.txt"),
+            #     "ar": self.extract_ar_file,
+            #     "qpress": self.extract_qpress_file,
+            #     "xar": self.extract_xar_file,
+            #     "ace": self.extract_ace_file,
+            #     "zoo": self.extract_zoo_file,
+            #     "arc": self.extract_arc_file,
+        }
         return True
 
     async def filter_event(self, event):
         if "file" in event.tags:
-            if not any(event.data["path"].endswith(f".{ext}") for ext in self.allowed_extensions):
+            if not event.data["compression"] in self.compression_methods:
                 return False, "Extract unable to handle file type"
         else:
             return False, "Event is not a file"
         return True
 
     async def handle_event(self, event):
+        compression_format = event.data["compression"]
         path = Path(event.data["path"])
-        extension = path.suffix.strip(".").lower()
         output_dir = path.parent / path.name.replace(".", "_")
         self.helpers.mkdir(output_dir)
 
         # Use the appropriate extraction method based on the file type
         self.info(f"Extracting {path} to {output_dir}")
-        if extension in self.zipcompressed:
-            success = self.extract_zip_file(path, output_dir)
-        elif extension in self.jadx:
-            success = await self.decompile_apk(path, output_dir)
+        extract_method = self.compression_methods.get(compression_format)
+        success = extract_method(path, output_dir)
 
         # If the extraction was successful, emit the event
         if success:
@@ -110,22 +101,288 @@ def extract_zip_file(self, path, output_dir):
             return False
         return True
 
-    async def decompile_apk(self, path, output_dir):
-        command = [
-            f"{self.scan.helpers.tools_dir}/jadx/bin/jadx",
-            "--threads-count",
-            self.threads,
-            "--output-dir",
-            str(output_dir),
-            str(path),
-        ]
-        try:
-            output = await self.run_process(command, check=True)
-        except CalledProcessError as e:
-            self.warning(f"Error decompiling {path}. STDERR: {repr(e.stderr)}")
-            return False
-        if not Path(output_dir / "resources").exists() and not Path(output_dir / "sources").exists():
-            self.warning(f"JADX was unable to decompile {path}.")
-            self.warning(output)
-            return False
-        return True
+    # def extract_bz2_file(self, path, output_file):
+    #     try:
+    #         with bz2.BZ2File(path, "rb") as file:
+    #             content = file.read()
+    #             with open(output_file, "wb") as f:
+    #                 f.write(content)
+    #     except Exception as e:
+    #         self.warning(f"Error extracting {path}. Exception: {repr(e)}")
+    #         return False
+    #     return True
+
+
+#
+# def extract_xz_file(self, path, output_file):
+#     try:
+#         with lzma.open(path, "rb") as file:
+#             content = file.read()
+#             with open(output_file, "wb") as f:
+#                 f.write(content)
+#     except Exception as e:
+#         self.warning(f"Error extracting {path}. Exception: {repr(e)}")
+#         return False
+#     return True
+#
+# def extract_7z_file(self, path, output_dir):
+#     try:
+#         with py7zr.SevenZipFile(path, mode="r") as z:
+#             z.extractall(path=output_dir)
+#     except Exception as e:
+#         self.warning(f"Error extracting {path}. Exception: {repr(e)}")
+#         return False
+#     return True
+#
+# def extract_rar_file(self, path, output_dir):
+#     try:
+#         with rarfile.RarFile(path, "r") as rar_ref:
+#             rar_ref.extractall(output_dir)
+#     except Exception as e:
+#         self.warning(f"Error extracting {path}. Exception: {repr(e)}")
+#         return False
+#     return True
+#
+# def extract_lzma_file(self, path, output_file):
+#     try:
+#         with lzma.open(path, "rb") as file:
+#             content = file.read()
+#             with open(output_file, "wb") as f:
+#                 f.write(content)
+#     except Exception as e:
+#         self.warning(f"Error extracting {path}. Exception: {repr(e)}")
+#         return False
+#     return True
+#
+# def extract_compress_file(self, path, output_file):
+#     try:
+#         with open(path, "rb") as file:
+#             content = file.read()
+#             with open(output_file, "wb") as f:
+#                 f.write(content)
+#     except Exception as e:
+#         self.warning(f"Error extracting {path}. Exception: {repr(e)}")
+#         return False
+#     return True
+#
+# def extract_zstd_file(self, path, output_file):
+#     try:
+#         with open(path, "rb") as file:
+#             dctx = zstd.ZstdDecompressor()
+#             content = dctx.decompress(file.read())
+#             with open(output_file, "wb") as f:
+#                 f.write(content)
+#     except Exception as e:
+#         self.warning(f"Error extracting {path}. Exception: {repr(e)}")
+#         return False
+#     return True
+#
+# def extract_lz4_file(self, path, output_file):
+#     try:
+#         with lz4.frame.open(path, "rb") as file:
+#             content = file.read()
+#             with open(output_file, "wb") as f:
+#                 f.write(content)
+#     except Exception as e:
+#         self.warning(f"Error extracting {path}. Exception: {repr(e)}")
+#         return False
+#     return True
+#
+# def extract_tar_file(self, path, output_dir):
+#     try:
+#         with tarfile.open(path, "r") as tar_ref:
+#             tar_ref.extractall(output_dir)
+#     except Exception as e:
+#         self.warning(f"Error extracting {path}. Exception: {repr(e)}")
+#         return False
+#     return True
+#
+# def extract_pak_file(self, path, output_dir):
+#     try:
+#         expak.extract_resources(path, output_dir)
+#     except Exception as e:
+#         self.warning(f"Error extracting {path}. Exception: {repr(e)}")
+#         return False
+#     return True
+#
+# def extract_lha_file(self, path, output_dir):
+#     try:
+#         shutil.unpack_archive(path, output_dir)
+#     except Exception as e:
+#         self.warning(f"Error extracting {path}. Exception: {repr(e)}")
+#         return False
+#     return True
+#
+# def extract_arj_file(self, path, output_dir):
+#     try:
+#         shutil.unpack_archive(path, output_dir)
+#     except Exception as e:
+#         self.warning(f"Error extracting {path}. Exception: {repr(e)}")
+#         return False
+#     return True
+#
+# def extract_cab_file(self, path, output_dir):
+#     try:
+#         shutil.unpack_archive(path, output_dir)
+#     except Exception as e:
+#         self.warning(f"Error extracting {path}. Exception: {repr(e)}")
+#         return False
+#     return True
+#
+# def extract_sit_file(self, path, output_dir):
+#     try:
+#         shutil.unpack_archive(path, output_dir)
+#     except Exception as e:
+#         self.warning(f"Error extracting {path}. Exception: {repr(e)}")
+#         return False
+#     return True
+#
+# def extract_binhex_file(self, path, output_file):
+#     try:
+#         with open(path, "rb") as file:
+#             content = file.read()
+#             with open(output_file, "wb") as f:
+#                 f.write(content)
+#     except Exception as e:
+#         self.warning(f"Error extracting {path}. Exception: {repr(e)}")
+#         return False
+#     return True
+#
+# def extract_lrzip_file(self, path, output_file):
+#     try:
+#         with open(path, "rb") as file:
+#             content = file.read()
+#             with open(output_file, "wb") as f:
+#                 f.write(content)
+#     except Exception as e:
+#         self.warning(f"Error extracting {path}. Exception: {repr(e)}")
+#         return False
+#     return True
+#
+# def extract_alz_file(self, path, output_dir):
+#     try:
+#         shutil.unpack_archive(path, output_dir)
+#     except Exception as e:
+#         self.warning(f"Error extracting {path}. Exception: {repr(e)}")
+#         return False
+#     return True
+#
+# def extract_tgz_file(self, path, output_dir):
+#     try:
+#         with tarfile.open(path, "r:gz") as tar_ref:
+#             tar_ref.extractall(output_dir)
+#     except Exception as e:
+#         self.warning(f"Error extracting {path}. Exception: {repr(e)}")
+#         return False
+#     return True
+#
+# def extract_gzip_file(self, path, output_file):
+#     try:
+#         with open(path, "rb") as file:
+#             content = file.read()
+#             with open(output_file, "wb") as f:
+#                 f.write(content)
+#     except Exception as e:
+#         self.warning(f"Error extracting {path}. Exception: {repr(e)}")
+#         return False
+#     return True
+#
+# def extract_lzip_file(self, path, output_file):
+#     try:
+#         with open(path, "rb") as file:
+#             content = file.read()
+#             with open(output_file, "wb") as f:
+#                 f.write(content)
+#     except Exception as e:
+#         self.warning(f"Error extracting {path}. Exception: {repr(e)}")
+#         return False
+#     return True
+#
+# def extract_palm_file(self, path, output_file):
+#     try:
+#         with open(path, "rb") as file:
+#             content = file.read()
+#             with open(output_file, "wb") as f:
+#                 f.write(content)
+#     except Exception as e:
+#         self.warning(f"Error extracting {path}. Exception: {repr(e)}")
+#         return False
+#     return True
+#
+# def extract_cpio_file(self, path, output_dir):
+#     try:
+#         shutil.unpack_archive(path, output_dir)
+#     except Exception as e:
+#         self.warning(f"Error extracting {path}. Exception: {repr(e)}")
+#         return False
+#     return True
+#
+# def extract_pack200_file(self, path, output_file):
+#     try:
+#         with open(path, "rb") as file:
+#             content = file.read()
+#             with open(output_file, "wb") as f:
+#                 f.write(content)
+#     except Exception as e:
+#         self.warning(f"Error extracting {path}. Exception: {repr(e)}")
+#         return False
+#     return True
+#
+# def extract_par2_file(self, path, output_file):
+#     try:
+#         with open(path, "rb") as file:
+#             content = file.read()
+#             with open(output_file, "wb") as f:
+#                 f.write(content)
+#     except Exception as e:
+#         self.warning(f"Error extracting {path}. Exception: {repr(e)}")
+#         return False
+#     return True
+#
+# def extract_ar_file(self, path, output_dir):
+#     try:
+#         shutil.unpack_archive(path, output_dir)
+#     except Exception as e:
+#         self.warning(f"Error extracting {path}. Exception: {repr(e)}")
+#         return False
+#     return True
+#
+# def extract_qpress_file(self, path, output_dir):
+#     try:
+#         shutil.unpack_archive(path, output_dir)
+#     except Exception as e:
+#         self.warning(f"Error extracting {path}. Exception: {repr(e)}")
+#         return False
+#     return True
+#
+# def extract_xar_file(self, path, output_dir):
+#     try:
+#         shutil.unpack_archive(path, output_dir)
+#     except Exception as e:
+#         self.warning(f"Error extracting {path}. Exception: {repr(e)}")
+#         return False
+#     return True
+#
+# def extract_ace_file(self, path, output_dir):
+#     try:
+#         shutil.unpack_archive(path, output_dir)
+#     except Exception as e:
+#         self.warning(f"Error extracting {path}. Exception: {repr(e)}")
+#         return False
+#     return True
+#
+# def extract_zoo_file(self, path, output_dir):
+#     try:
+#         shutil.unpack_archive(path, output_dir)
+#     except Exception as e:
+#         self.warning(f"Error extracting {path}. Exception: {repr(e)}")
+#         return False
+#     return True
+#
+# def extract_arc_file(self, path, output_dir):
+#     try:
+#         shutil.unpack_archive(path, output_dir)
+#     except Exception as e:
+#         self.warning(f"Error extracting {path}. Exception: {repr(e)}")
+#         return False
+#     return True
diff --git a/bbot/test/test_step_2/module_tests/test_module_extract.py b/bbot/test/test_step_2/module_tests/test_module_extract.py
index 15eedbc03e..62baf47e9a 100644
--- a/bbot/test/test_step_2/module_tests/test_module_extract.py
+++ b/bbot/test/test_step_2/module_tests/test_module_extract.py
@@ -1,7 +1,7 @@
 import zipfile
 
 from pathlib import Path
-from .base import ModuleTestBase, tempapkfile
+from .base import ModuleTestBase
 
 
 class TestExtractZip(ModuleTestBase):
@@ -39,51 +39,3 @@ def check(self, module_test, events):
         assert 1 == len(extract_event), "Failed to extract zip"
         extract_path = Path(extract_event[0].data["path"])
         assert extract_path.is_dir(), "Destination folder doesn't exist"
-
-
-class TestExtractApk(ModuleTestBase):
-    modules_overrides = ["apkpure", "google_playstore", "speculate", "extract"]
-    apk_file = tempapkfile()
-
-    async def setup_after_prep(self, module_test):
-        await module_test.mock_dns({"blacklanternsecurity.com": {"A": ["127.0.0.99"]}})
-        module_test.httpx_mock.add_response(
-            url="https://play.google.com/store/search?q=blacklanternsecurity&c=apps",
-            text="""<!DOCTYPE html>
-            <html>
-            <head>
-            <title>"blacklanternsecurity" - Android Apps on Google Play</title>
-            </head>
-            <body>
-            <a href="/store/apps/details?id=com.bbot.test&pcampaignid=dontmatchme&pli=1"/>
-            </body>
-            </html>""",
-        )
-        module_test.httpx_mock.add_response(
-            url="https://play.google.com/store/apps/details?id=com.bbot.test",
-            text="""<!DOCTYPE html>
-            <html>
-            <head>
-            <title>BBOT</title>
-            </head>
-            <body>
-            <meta name="appstore:developer_url" content="https://www.blacklanternsecurity.com">
-            </div>
-            </div>
-            </body>
-            </html>""",
-        )
-        module_test.httpx_mock.add_response(
-            url="https://d.apkpure.com/b/XAPK/com.bbot.test?version=latest",
-            content=self.apk_file,
-        )
-
-    def check(self, module_test, events):
-        extract_event = [
-            e
-            for e in events
-            if e.type == "FILESYSTEM" and "com_bbot_test_xapk" in e.data["path"] and "folder" in e.tags
-        ]
-        assert 1 == len(extract_event), "Failed to extract apk"
-        extract_path = Path(extract_event[0].data["path"])
-        assert extract_path.is_dir(), "Destination apk doesn't exist"

From 9536b579749054d2173f8db634e3594f2c73ec9a Mon Sep 17 00:00:00 2001
From: Dom Whewell <dom.whewell@sage.com>
Date: Thu, 21 Nov 2024 18:36:41 +0000
Subject: [PATCH 06/29] Added more compression formats to extract

---
 bbot/modules/filedownload.py                  |   7 +
 bbot/modules/internal/extract.py              | 378 +++++++++---------
 .../module_tests/test_module_extract.py       | 239 ++++++++++-
 3 files changed, 427 insertions(+), 197 deletions(-)

diff --git a/bbot/modules/filedownload.py b/bbot/modules/filedownload.py
index 872a447a1f..34d616a894 100644
--- a/bbot/modules/filedownload.py
+++ b/bbot/modules/filedownload.py
@@ -63,6 +63,7 @@ class filedownload(BaseModule):
             "swp",  #  Swap File (temporary file, often Vim)
             "sxw",  #  OpenOffice.org Writer document
             "tar.gz",  # Gzip-Compressed Tar Archive
+            "tgz",  #  Gzip-Compressed Tar Archive
             "tar",  #  Tar Archive
             "txt",  #  Plain Text Document
             "vbs",  #  Visual Basic Script
@@ -74,6 +75,12 @@ class filedownload(BaseModule):
             "yaml",  #  YAML Ain't Markup Language
             "yml",  #  YAML Ain't Markup Language
             "zip",  #  Zip Archive
+            "bz2",  #  Bzip2 Compressed File
+            "xz",  #  XZ Compressed File
+            "7z",  #  7-Zip Compressed File
+            "lzma",  #  LZMA Compressed File
+            "zst",  #  Zstandard Compressed File
+            "lz4",  #  LZ4 Compressed File
         ],
         "max_filesize": "10MB",
         "base_64_encoded_file": "false",
diff --git a/bbot/modules/internal/extract.py b/bbot/modules/internal/extract.py
index 37ea82d546..7876ed1189 100644
--- a/bbot/modules/internal/extract.py
+++ b/bbot/modules/internal/extract.py
@@ -1,13 +1,16 @@
 import zipfile
 
-# import bz2
-# import lzma
+import bz2
+import lzma
+
 # import expak
-# import tarfile
+import tarfile
+
 # import rarfile
-# import py7zr
-# import zstandard as zstd
-# import lz4.frame
+import py7zr
+import zstandard as zstd
+import lz4.frame
+
 # import shutil
 
 from pathlib import Path
@@ -28,15 +31,15 @@ class extract(BaseInternalModule):
     async def setup(self):
         self.compression_methods = {
             "zip": self.extract_zip_file,
-            #     "bz2": lambda path, output_dir: self.extract_bz2_file(path, output_dir / "content.txt"),
-            #     "xz": lambda path, output_dir: self.extract_xz_file(path, output_dir / "content.txt"),
-            #     "7z": self.extract_7z_file,
-            #     "rar": self.extract_rar_file,
-            #     "lzma": lambda path, output_dir: self.extract_lzma_file(path, output_dir / "content.txt"),
+            "bzip2": lambda path, output_dir: self.extract_bz2_file(path, output_dir / "content.txt"),
+            "xz": lambda path, output_dir: self.extract_xz_file(path, output_dir / "content.txt"),
+            "7z": self.extract_7z_file,
+            # "rar": self.extract_rar_file,
+            "lzma": lambda path, output_dir: self.extract_lzma_file(path, output_dir / "content.txt"),
             #     "compress": lambda path, output_dir: self.extract_compress_file(path, output_dir / "content.txt"),
-            #     "zstd": lambda path, output_dir: self.extract_zstd_file(path, output_dir / "content.txt"),
-            #     "lz4": lambda path, output_dir: self.extract_lz4_file(path, output_dir / "content.txt"),
-            #     "tar": self.extract_tar_file,
+            "zstd": lambda path, output_dir: self.extract_zstd_file(path, output_dir / "content.txt"),
+            "lz4": lambda path, output_dir: self.extract_lz4_file(path, output_dir / "content.txt"),
+            "tar": self.extract_tar_file,
             #     "pak": self.extract_pak_file,
             #     "lha": self.extract_lha_file,
             #     "arj": self.extract_arj_file,
@@ -45,8 +48,8 @@ async def setup(self):
             #     "binhex": lambda path, output_dir: self.extract_binhex_file(path, output_dir / "content.txt"),
             #     "lrzip": lambda path, output_dir: self.extract_lrzip_file(path, output_dir / "content.txt"),
             #     "alz": self.extract_alz_file,
-            #     "tgz": self.extract_tgz_file,
-            #     "gzip": lambda path, output_dir: self.extract_gzip_file(path, output_dir / "content.txt"),
+            "tgz": self.extract_gzip_file,
+            "gzip": self.extract_gzip_file,
             #     "lzip": lambda path, output_dir: self.extract_lzip_file(path, output_dir / "content.txt"),
             #     "palm": lambda path, output_dir: self.extract_palm_file(path, output_dir / "content.txt"),
             #     "cpio": self.extract_cpio_file,
@@ -64,7 +67,7 @@ async def setup(self):
     async def filter_event(self, event):
         if "file" in event.tags:
             if not event.data["compression"] in self.compression_methods:
-                return False, "Extract unable to handle file type"
+                return False, f"Extract unable to handle file type: {event.data['compression']}, {event.data['path']}"
         else:
             return False, "Event is not a file"
         return True
@@ -101,9 +104,61 @@ def extract_zip_file(self, path, output_dir):
             return False
         return True
 
-    # def extract_bz2_file(self, path, output_file):
+    def extract_bz2_file(self, path, output_file):
+        try:
+            with bz2.BZ2File(path, "rb") as file:
+                content = file.read()
+                with open(output_file, "wb") as f:
+                    f.write(content)
+        except Exception as e:
+            self.warning(f"Error extracting {path}. Exception: {repr(e)}")
+            return False
+        return True
+
+    def extract_xz_file(self, path, output_file):
+        try:
+            with lzma.open(path, "rb") as file:
+                content = file.read()
+                with open(output_file, "wb") as f:
+                    f.write(content)
+        except Exception as e:
+            self.warning(f"Error extracting {path}. Exception: {repr(e)}")
+            return False
+        return True
+
+    def extract_7z_file(self, path, output_dir):
+        try:
+            with py7zr.SevenZipFile(path, mode="r") as z:
+                z.extractall(path=output_dir)
+        except Exception as e:
+            self.warning(f"Error extracting {path}. Exception: {repr(e)}")
+            return False
+        return True
+
+    # def extract_rar_file(self, path, output_dir):
     #     try:
-    #         with bz2.BZ2File(path, "rb") as file:
+    #         with rarfile.RarFile(path, "r") as rar_ref:
+    #             rar_ref.extractall(output_dir)
+    #     except Exception as e:
+    #         self.warning(f"Error extracting {path}. Exception: {repr(e)}")
+    #         return False
+    #     return True
+
+    def extract_lzma_file(self, path, output_file):
+        try:
+            with lzma.open(path, "rb") as file:
+                content = file.read()
+                with open(output_file, "wb") as f:
+                    f.write(content)
+        except Exception as e:
+            self.warning(f"Error extracting {path}. Exception: {repr(e)}")
+            return False
+        return True
+
+    #
+    # def extract_compress_file(self, path, output_file):
+    #     try:
+    #         with open(path, "rb") as file:
     #             content = file.read()
     #             with open(output_file, "wb") as f:
     #                 f.write(content)
@@ -111,181 +166,120 @@ def extract_zip_file(self, path, output_dir):
     #         self.warning(f"Error extracting {path}. Exception: {repr(e)}")
     #         return False
     #     return True
+    #
+    def extract_zstd_file(self, path, output_file):
+        try:
+            with open(path, "rb") as file:
+                dctx = zstd.ZstdDecompressor()
+                content = dctx.decompress(file.read())
+                with open(output_file, "wb") as f:
+                    f.write(content)
+        except Exception as e:
+            self.warning(f"Error extracting {path}. Exception: {repr(e)}")
+            return False
+        return True
+
+    def extract_lz4_file(self, path, output_file):
+        try:
+            with lz4.frame.open(path, "rb") as file:
+                content = file.read()
+                with open(output_file, "wb") as f:
+                    f.write(content)
+        except Exception as e:
+            self.warning(f"Error extracting {path}. Exception: {repr(e)}")
+            return False
+        return True
+
+    def extract_tar_file(self, path, output_dir):
+        try:
+            with tarfile.open(path, "r") as tar_ref:
+                tar_ref.extractall(output_dir)
+        except Exception as e:
+            self.warning(f"Error extracting {path}. Exception: {repr(e)}")
+            return False
+        return True
+
+    #
+    # def extract_pak_file(self, path, output_dir):
+    #     try:
+    #         expak.extract_resources(path, output_dir)
+    #     except Exception as e:
+    #         self.warning(f"Error extracting {path}. Exception: {repr(e)}")
+    #         return False
+    #     return True
+    #
+    # def extract_lha_file(self, path, output_dir):
+    #     try:
+    #         shutil.unpack_archive(path, output_dir)
+    #     except Exception as e:
+    #         self.warning(f"Error extracting {path}. Exception: {repr(e)}")
+    #         return False
+    #     return True
+    #
+    # def extract_arj_file(self, path, output_dir):
+    #     try:
+    #         shutil.unpack_archive(path, output_dir)
+    #     except Exception as e:
+    #         self.warning(f"Error extracting {path}. Exception: {repr(e)}")
+    #         return False
+    #     return True
+    #
+    # def extract_cab_file(self, path, output_dir):
+    #     try:
+    #         shutil.unpack_archive(path, output_dir)
+    #     except Exception as e:
+    #         self.warning(f"Error extracting {path}. Exception: {repr(e)}")
+    #         return False
+    #     return True
+    #
+    # def extract_sit_file(self, path, output_dir):
+    #     try:
+    #         shutil.unpack_archive(path, output_dir)
+    #     except Exception as e:
+    #         self.warning(f"Error extracting {path}. Exception: {repr(e)}")
+    #         return False
+    #     return True
+    #
+    # def extract_binhex_file(self, path, output_file):
+    #     try:
+    #         with open(path, "rb") as file:
+    #             content = file.read()
+    #             with open(output_file, "wb") as f:
+    #                 f.write(content)
+    #     except Exception as e:
+    #         self.warning(f"Error extracting {path}. Exception: {repr(e)}")
+    #         return False
+    #     return True
+    #
+    # def extract_lrzip_file(self, path, output_file):
+    #     try:
+    #         with open(path, "rb") as file:
+    #             content = file.read()
+    #             with open(output_file, "wb") as f:
+    #                 f.write(content)
+    #     except Exception as e:
+    #         self.warning(f"Error extracting {path}. Exception: {repr(e)}")
+    #         return False
+    #     return True
+    #
+    # def extract_alz_file(self, path, output_dir):
+    #     try:
+    #         shutil.unpack_archive(path, output_dir)
+    #     except Exception as e:
+    #         self.warning(f"Error extracting {path}. Exception: {repr(e)}")
+    #         return False
+    #     return True
+    #
+    def extract_gzip_file(self, path, output_dir):
+        try:
+            with tarfile.open(path, "r:gz") as tar_ref:
+                tar_ref.extractall(output_dir)
+        except Exception as e:
+            self.warning(f"Error extracting {path}. Exception: {repr(e)}")
+            return False
+        return True
 
 
-#
-# def extract_xz_file(self, path, output_file):
-#     try:
-#         with lzma.open(path, "rb") as file:
-#             content = file.read()
-#             with open(output_file, "wb") as f:
-#                 f.write(content)
-#     except Exception as e:
-#         self.warning(f"Error extracting {path}. Exception: {repr(e)}")
-#         return False
-#     return True
-#
-# def extract_7z_file(self, path, output_dir):
-#     try:
-#         with py7zr.SevenZipFile(path, mode="r") as z:
-#             z.extractall(path=output_dir)
-#     except Exception as e:
-#         self.warning(f"Error extracting {path}. Exception: {repr(e)}")
-#         return False
-#     return True
-#
-# def extract_rar_file(self, path, output_dir):
-#     try:
-#         with rarfile.RarFile(path, "r") as rar_ref:
-#             rar_ref.extractall(output_dir)
-#     except Exception as e:
-#         self.warning(f"Error extracting {path}. Exception: {repr(e)}")
-#         return False
-#     return True
-#
-# def extract_lzma_file(self, path, output_file):
-#     try:
-#         with lzma.open(path, "rb") as file:
-#             content = file.read()
-#             with open(output_file, "wb") as f:
-#                 f.write(content)
-#     except Exception as e:
-#         self.warning(f"Error extracting {path}. Exception: {repr(e)}")
-#         return False
-#     return True
-#
-# def extract_compress_file(self, path, output_file):
-#     try:
-#         with open(path, "rb") as file:
-#             content = file.read()
-#             with open(output_file, "wb") as f:
-#                 f.write(content)
-#     except Exception as e:
-#         self.warning(f"Error extracting {path}. Exception: {repr(e)}")
-#         return False
-#     return True
-#
-# def extract_zstd_file(self, path, output_file):
-#     try:
-#         with open(path, "rb") as file:
-#             dctx = zstd.ZstdDecompressor()
-#             content = dctx.decompress(file.read())
-#             with open(output_file, "wb") as f:
-#                 f.write(content)
-#     except Exception as e:
-#         self.warning(f"Error extracting {path}. Exception: {repr(e)}")
-#         return False
-#     return True
-#
-# def extract_lz4_file(self, path, output_file):
-#     try:
-#         with lz4.frame.open(path, "rb") as file:
-#             content = file.read()
-#             with open(output_file, "wb") as f:
-#                 f.write(content)
-#     except Exception as e:
-#         self.warning(f"Error extracting {path}. Exception: {repr(e)}")
-#         return False
-#     return True
-#
-# def extract_tar_file(self, path, output_dir):
-#     try:
-#         with tarfile.open(path, "r") as tar_ref:
-#             tar_ref.extractall(output_dir)
-#     except Exception as e:
-#         self.warning(f"Error extracting {path}. Exception: {repr(e)}")
-#         return False
-#     return True
-#
-# def extract_pak_file(self, path, output_dir):
-#     try:
-#         expak.extract_resources(path, output_dir)
-#     except Exception as e:
-#         self.warning(f"Error extracting {path}. Exception: {repr(e)}")
-#         return False
-#     return True
-#
-# def extract_lha_file(self, path, output_dir):
-#     try:
-#         shutil.unpack_archive(path, output_dir)
-#     except Exception as e:
-#         self.warning(f"Error extracting {path}. Exception: {repr(e)}")
-#         return False
-#     return True
-#
-# def extract_arj_file(self, path, output_dir):
-#     try:
-#         shutil.unpack_archive(path, output_dir)
-#     except Exception as e:
-#         self.warning(f"Error extracting {path}. Exception: {repr(e)}")
-#         return False
-#     return True
-#
-# def extract_cab_file(self, path, output_dir):
-#     try:
-#         shutil.unpack_archive(path, output_dir)
-#     except Exception as e:
-#         self.warning(f"Error extracting {path}. Exception: {repr(e)}")
-#         return False
-#     return True
-#
-# def extract_sit_file(self, path, output_dir):
-#     try:
-#         shutil.unpack_archive(path, output_dir)
-#     except Exception as e:
-#         self.warning(f"Error extracting {path}. Exception: {repr(e)}")
-#         return False
-#     return True
-#
-# def extract_binhex_file(self, path, output_file):
-#     try:
-#         with open(path, "rb") as file:
-#             content = file.read()
-#             with open(output_file, "wb") as f:
-#                 f.write(content)
-#     except Exception as e:
-#         self.warning(f"Error extracting {path}. Exception: {repr(e)}")
-#         return False
-#     return True
-#
-# def extract_lrzip_file(self, path, output_file):
-#     try:
-#         with open(path, "rb") as file:
-#             content = file.read()
-#             with open(output_file, "wb") as f:
-#                 f.write(content)
-#     except Exception as e:
-#         self.warning(f"Error extracting {path}. Exception: {repr(e)}")
-#         return False
-#     return True
-#
-# def extract_alz_file(self, path, output_dir):
-#     try:
-#         shutil.unpack_archive(path, output_dir)
-#     except Exception as e:
-#         self.warning(f"Error extracting {path}. Exception: {repr(e)}")
-#         return False
-#     return True
-#
-# def extract_tgz_file(self, path, output_dir):
-#     try:
-#         with tarfile.open(path, "r:gz") as tar_ref:
-#             tar_ref.extractall(output_dir)
-#     except Exception as e:
-#         self.warning(f"Error extracting {path}. Exception: {repr(e)}")
-#         return False
-#     return True
-#
-# def extract_gzip_file(self, path, output_file):
-#     try:
-#         with open(path, "rb") as file:
-#             content = file.read()
-#             with open(output_file, "wb") as f:
-#                 f.write(content)
-#     except Exception as e:
-#         self.warning(f"Error extracting {path}. Exception: {repr(e)}")
-#         return False
-#     return True
 #
 # def extract_lzip_file(self, path, output_file):
 #     try:
diff --git a/bbot/test/test_step_2/module_tests/test_module_extract.py b/bbot/test/test_step_2/module_tests/test_module_extract.py
index 62baf47e9a..a1d43d82a7 100644
--- a/bbot/test/test_step_2/module_tests/test_module_extract.py
+++ b/bbot/test/test_step_2/module_tests/test_module_extract.py
@@ -1,23 +1,98 @@
 import zipfile
+import bz2
+import lzma
+import tarfile
+
+# import rarfile
+import py7zr
+import zstandard as zstd
+import lz4.frame
 
 from pathlib import Path
 from .base import ModuleTestBase
 
 
-class TestExtractZip(ModuleTestBase):
+class TestExtract(ModuleTestBase):
     targets = ["http://127.0.0.1:8888"]
     modules_overrides = ["filedownload", "httpx", "excavate", "speculate", "extract"]
-
     temp_path = Path("/tmp/.bbot_test")
+
+    # Create a text file to compress
+    text_file = temp_path / "test.txt"
+    with open(text_file, "w") as f:
+        f.write("This is a test file")
+
+    # ZIP
     zip_file = temp_path / "test.zip"
     with zipfile.ZipFile(zip_file, "w") as z:
-        z.writestr("test.txt", "This is a test file")
+        z.write(text_file, "test.txt")
+
+    # BZ2
+    bz2_file = temp_path / "test.bz2"
+    with bz2.BZ2File(bz2_file, "wb") as b:
+        with open(text_file, "rb") as f:
+            b.write(f.read())
+
+    # XZ
+    xz_file = temp_path / "test.xz"
+    with lzma.open(xz_file, "wb") as x:
+        with open(text_file, "rb") as f:
+            x.write(f.read())
+
+    # 7Z
+    seven_z_file = temp_path / "test.7z"
+    with py7zr.SevenZipFile(seven_z_file, "w") as z:
+        z.write(text_file, "test.txt")
+
+    # RAR
+    # rar_file = temp_path / "test.rar"
+    # with rarfile.RarFile(rar_file, "w") as r:
+    #     r.write(text_file, "test.txt")
+
+    # LZMA
+    lzma_file = temp_path / "test.lzma"
+    with lzma.open(lzma_file, "wb") as l:
+        with open(text_file, "rb") as f:
+            l.write(f.read())
+
+    # TAR
+    tar_file = temp_path / "test.tar"
+    with tarfile.open(tar_file, "w") as t:
+        t.add(text_file, arcname="test.txt")
+
+    # ZSTD
+    zstd_file = temp_path / "test.zst"
+    with open(text_file, "rb") as f:
+        content = f.read()
+        with open(zstd_file, "wb") as z:
+            cctx = zstd.ZstdCompressor()
+            z.write(cctx.compress(content))
+
+    # LZ4
+    lz4_file = temp_path / "test.lz4"
+    with open(text_file, "rb") as f:
+        content = f.read()
+        with lz4.frame.open(lz4_file, "wb") as l:
+            l.write(content)
+
+    # TAR.GZ
+    tgz_file = temp_path / "test.tgz"
+    with tarfile.open(tgz_file, "w:gz") as t:
+        t.add(text_file, arcname="test.txt")
 
     async def setup_after_prep(self, module_test):
         module_test.set_expect_requests(
             dict(uri="/"),
             dict(
-                response_data='<a href="/test.zip"/>',
+                response_data="""<a href="/test.zip"/>
+                <a href="/test.bz2"/>
+                <a href="/test.xz"/>
+                <a href="/test.7z"/>
+                <a href="/test.lzma"/>
+                <a href="/test.tar"/>
+                <a href="/test.zst"/>
+                <a href="/test.lz4"/>
+                <a href="/test.tgz"/>"""
             ),
         )
         module_test.set_expect_requests(
@@ -26,11 +101,75 @@ async def setup_after_prep(self, module_test):
                 response_data=self.zip_file.read_bytes(),
                 headers={"Content-Type": "application/zip"},
             ),
-        )
+        ),
+        module_test.set_expect_requests(
+            dict(uri="/test.bz2"),
+            dict(
+                response_data=self.bz2_file.read_bytes(),
+                headers={"Content-Type": "application/x-bzip2"},
+            ),
+        ),
+        module_test.set_expect_requests(
+            dict(uri="/test.xz"),
+            dict(
+                response_data=self.xz_file.read_bytes(),
+                headers={"Content-Type": "application/x-xz"},
+            ),
+        ),
+        module_test.set_expect_requests(
+            dict(uri="/test.7z"),
+            dict(
+                response_data=self.seven_z_file.read_bytes(),
+                headers={"Content-Type": "application/x-7z-compressed"},
+            ),
+        ),
+        # module_test.set_expect_requests(
+        #     dict(uri="/test.rar"),
+        #     dict(
+        #         response_data=self.rar_file.read_bytes(),
+        #         headers={"Content-Type": "application/vnd.rar"},
+        #     ),
+        # ),
+        module_test.set_expect_requests(
+            dict(uri="/test.lzma"),
+            dict(
+                response_data=self.lzma_file.read_bytes(),
+                headers={"Content-Type": "application/x-lzma"},
+            ),
+        ),
+        module_test.set_expect_requests(
+            dict(uri="/test.zst"),
+            dict(
+                response_data=self.zstd_file.read_bytes(),
+                headers={"Content-Type": "application/zstd"},
+            ),
+        ),
+        module_test.set_expect_requests(
+            dict(uri="/test.lz4"),
+            dict(
+                response_data=self.lz4_file.read_bytes(),
+                headers={"Content-Type": "application/x-lz4"},
+            ),
+        ),
+        module_test.set_expect_requests(
+            dict(uri="/test.tar"),
+            dict(
+                response_data=self.tar_file.read_bytes(),
+                headers={"Content-Type": "application/x-tar"},
+            ),
+        ),
+        module_test.set_expect_requests(
+            dict(uri="/test.tgz"),
+            dict(
+                response_data=self.tgz_file.read_bytes(),
+                headers={"Content-Type": "application/x-tgz"},
+            ),
+        ),
 
     def check(self, module_test, events):
         filesystem_events = [e for e in events if e.type == "FILESYSTEM"]
 
+        # ZIP
         zip_file_event = [e for e in filesystem_events if "test.zip" in e.data["path"]]
         assert 1 == len(zip_file_event), "No zip file found"
         file = Path(zip_file_event[0].data["path"])
@@ -39,3 +178,93 @@ def check(self, module_test, events):
         assert 1 == len(extract_event), "Failed to extract zip"
         extract_path = Path(extract_event[0].data["path"])
         assert extract_path.is_dir(), "Destination folder doesn't exist"
+
+        # BZ2
+        bz2_file_event = [e for e in filesystem_events if "test.bz2" in e.data["path"]]
+        assert 1 == len(bz2_file_event), "No bz2 file found"
+        file = Path(bz2_file_event[0].data["path"])
+        assert file.is_file(), f"File not found at {file}"
+        extract_event = [e for e in filesystem_events if "test_bz2" in e.data["path"] and "folder" in e.tags]
+        assert 1 == len(extract_event), "Failed to extract bz2"
+        extract_path = Path(extract_event[0].data["path"])
+        assert extract_path.is_dir(), "Destination folder doesn't exist"
+
+        # XZ
+        xz_file_event = [e for e in filesystem_events if "test.xz" in e.data["path"]]
+        assert 1 == len(xz_file_event), "No xz file found"
+        file = Path(xz_file_event[0].data["path"])
+        assert file.is_file(), f"File not found at {file}"
+        extract_event = [e for e in filesystem_events if "test_xz" in e.data["path"] and "folder" in e.tags]
+        assert 1 == len(extract_event), "Failed to extract xz"
+        extract_path = Path(extract_event[0].data["path"])
+        assert extract_path.is_dir(), "Destination folder doesn't exist"
+
+        # 7Z
+        seven_z_file_event = [e for e in filesystem_events if "test.7z" in e.data["path"]]
+        assert 1 == len(seven_z_file_event), "No 7z file found"
+        file = Path(seven_z_file_event[0].data["path"])
+        assert file.is_file(), f"File not found at {file}"
+        extract_event = [e for e in filesystem_events if "test_7z" in e.data["path"] and "folder" in e.tags]
+        assert 1 == len(extract_event), "Failed to extract 7z"
+        extract_path = Path(extract_event[0].data["path"])
+        assert extract_path.is_dir(), "Destination folder doesn't exist"
+
+        # RAR
+        # rar_file_event = [e for e in filesystem_events if "test.rar" in e.data["path"]]
+        # assert 1 == len(rar_file_event), "No rar file found"
+        # file = Path(rar_file_event[0].data["path"])
+        # assert file.is_file(), f"File not found at {file}"
+        # extract_event = [e for e in filesystem_events if "test_rar" in e.data["path"] and "folder" in e.tags]
+        # assert 1 == len(extract_event), "Failed to extract rar"
+        # extract_path = Path(extract_event[0].data["path"])
+        # assert extract_path.is_dir(), "Destination folder doesn't exist"
+
+        # LZMA
+        lzma_file_event = [e for e in filesystem_events if "test.lzma" in e.data["path"]]
+        assert 1 == len(lzma_file_event), "No lzma file found"
+        file = Path(lzma_file_event[0].data["path"])
+        assert file.is_file(), f"File not found at {file}"
+        extract_event = [e for e in filesystem_events if "test_lzma" in e.data["path"] and "folder" in e.tags]
+        assert 1 == len(extract_event), "Failed to extract lzma"
+        extract_path = Path(extract_event[0].data["path"])
+        assert extract_path.is_dir(), "Destination folder doesn't exist"
+
+        # ZSTD
+        zstd_file_event = [e for e in filesystem_events if "test.zst" in e.data["path"]]
+        assert 1 == len(zstd_file_event), "No zstd file found"
+        file = Path(zstd_file_event[0].data["path"])
+        assert file.is_file(), f"File not found at {file}"
+        extract_event = [e for e in filesystem_events if "test_zst" in e.data["path"] and "folder" in e.tags]
+        assert 1 == len(extract_event), "Failed to extract zstd"
+        extract_path = Path(extract_event[0].data["path"])
+        assert extract_path.is_dir(), "Destination folder doesn't exist"
+
+        # LZ4
+        lz4_file_event = [e for e in filesystem_events if "test.lz4" in e.data["path"]]
+        assert 1 == len(lz4_file_event), "No lz4 file found"
+        file = Path(lz4_file_event[0].data["path"])
+        assert file.is_file(), f"File not found at {file}"
+        extract_event = [e for e in filesystem_events if "test_lz4" in e.data["path"] and "folder" in e.tags]
+        assert 1 == len(extract_event), "Failed to extract lz4"
+        extract_path = Path(extract_event[0].data["path"])
+        assert extract_path.is_dir(), "Destination folder doesn't exist"
+
+        # TAR
+        tar_file_event = [e for e in filesystem_events if "test.tar" in e.data["path"]]
+        assert 1 == len(tar_file_event), "No tar file found"
+        file = Path(tar_file_event[0].data["path"])
+        assert file.is_file(), f"File not found at {file}"
+        extract_event = [e for e in filesystem_events if "test_tar" in e.data["path"] and "folder" in e.tags]
+        assert 1 == len(extract_event), "Failed to extract tar"
+        extract_path = Path(extract_event[0].data["path"])
+        assert extract_path.is_dir(), "Destination folder doesn't exist"
+
+        # TAR.GZ
+        tgz_file_event = [e for e in filesystem_events if "test.tgz" in e.data["path"]]
+        assert 1 == len(tgz_file_event), "No tgz file found"
+        file = Path(tgz_file_event[0].data["path"])
+        assert file.is_file(), f"File not found at {file}"
+        extract_event = [e for e in filesystem_events if "test_tgz" in e.data["path"] and "folder" in e.tags]
+        assert 1 == len(extract_event), "Failed to extract tgz"
+        extract_path = Path(extract_event[0].data["path"])
+        assert extract_path.is_dir(), "Destination folder doesn't exist"

From 3a68fbc1ce961ff72858ad20271fc756722dd9a4 Mon Sep 17 00:00:00 2001
From: Dom Whewell <dom.whewell@sage.com>
Date: Thu, 21 Nov 2024 18:56:34 +0000
Subject: [PATCH 07/29] Add imports

---
 bbot/modules/internal/extract.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bbot/modules/internal/extract.py b/bbot/modules/internal/extract.py
index 7876ed1189..b2cfcd621c 100644
--- a/bbot/modules/internal/extract.py
+++ b/bbot/modules/internal/extract.py
@@ -26,7 +26,7 @@ class extract(BaseInternalModule):
         "created_date": "2024-11-04",
         "author": "@domwhewell-sage",
     }
-    # deps_pip = ["rarfile", "py7zr", "zstandard", "lz4"]
+    deps_pip = ["py7zr", "zstandard", "lz4"]
 
     async def setup(self):
         self.compression_methods = {

From d935444ead814d66e8d12a04b052e29efa9540d8 Mon Sep 17 00:00:00 2001
From: Dom Whewell <dom.whewell@sage.com>
Date: Mon, 2 Dec 2024 17:52:58 +0000
Subject: [PATCH 08/29] Change to use OS commands

---
 bbot/modules/internal/extract.py              | 367 ++----------------
 .../module_tests/test_module_extract.py       |  64 +--
 2 files changed, 50 insertions(+), 381 deletions(-)

diff --git a/bbot/modules/internal/extract.py b/bbot/modules/internal/extract.py
index b2cfcd621c..af08f940f6 100644
--- a/bbot/modules/internal/extract.py
+++ b/bbot/modules/internal/extract.py
@@ -1,20 +1,6 @@
-import zipfile
-
-import bz2
-import lzma
-
-# import expak
-import tarfile
-
-# import rarfile
-import py7zr
-import zstandard as zstd
-import lz4.frame
-
-# import shutil
-
 from pathlib import Path
 from bbot.modules.internal.base import BaseInternalModule
+from bbot.core.helpers.libmagic import get_magic_info, get_compression
 
 
 class extract(BaseInternalModule):
@@ -26,41 +12,20 @@ class extract(BaseInternalModule):
         "created_date": "2024-11-04",
         "author": "@domwhewell-sage",
     }
-    deps_pip = ["py7zr", "zstandard", "lz4"]
+    deps_apt = ["7zip", "tar", "unrar", "gunzip", "zstd", "lz4"]
 
     async def setup(self):
         self.compression_methods = {
-            "zip": self.extract_zip_file,
-            "bzip2": lambda path, output_dir: self.extract_bz2_file(path, output_dir / "content.txt"),
-            "xz": lambda path, output_dir: self.extract_xz_file(path, output_dir / "content.txt"),
-            "7z": self.extract_7z_file,
-            # "rar": self.extract_rar_file,
-            "lzma": lambda path, output_dir: self.extract_lzma_file(path, output_dir / "content.txt"),
-            #     "compress": lambda path, output_dir: self.extract_compress_file(path, output_dir / "content.txt"),
-            "zstd": lambda path, output_dir: self.extract_zstd_file(path, output_dir / "content.txt"),
-            "lz4": lambda path, output_dir: self.extract_lz4_file(path, output_dir / "content.txt"),
-            "tar": self.extract_tar_file,
-            #     "pak": self.extract_pak_file,
-            #     "lha": self.extract_lha_file,
-            #     "arj": self.extract_arj_file,
-            #     "cab": self.extract_cab_file,
-            #     "sit": self.extract_sit_file,
-            #     "binhex": lambda path, output_dir: self.extract_binhex_file(path, output_dir / "content.txt"),
-            #     "lrzip": lambda path, output_dir: self.extract_lrzip_file(path, output_dir / "content.txt"),
-            #     "alz": self.extract_alz_file,
-            "tgz": self.extract_gzip_file,
-            "gzip": self.extract_gzip_file,
-            #     "lzip": lambda path, output_dir: self.extract_lzip_file(path, output_dir / "content.txt"),
-            #     "palm": lambda path, output_dir: self.extract_palm_file(path, output_dir / "content.txt"),
-            #     "cpio": self.extract_cpio_file,
-            #     "pack200": lambda path, output_dir: self.extract_pack200_file(path, output_dir / "content.txt"),
-            #     "par2": lambda path, output_dir: self.extract_par2_file(path, output_dir / "content.txt"),
-            #     "ar": self.extract_ar_file,
-            #     "qpress": self.extract_qpress_file,
-            #     "xar": self.extract_xar_file,
-            #     "ace": self.extract_ace_file,
-            #     "zoo": self.extract_zoo_file,
-            #     "arc": self.extract_arc_file,
+            "zip": ["7z", "x", '-p""', "-aoa", "{filename}", "-o{extract_dir}/"],
+            "bzip2": ["tar", "--overwrite", "-xvjf", "{filename}", "-C", "{extract_dir}/"],
+            "xz": ["tar", "--overwrite", "-xvJf", "{filename}", "-C", "{extract_dir}/"],
+            "7z": ["7z", "x", '-p""', "-aoa", "{filename}", "-o{extract_dir}/"],
+            "rar": ["unrar", "x", "-o+", "-p-", "{filename}", "{extract_dir}/"],
+            "lzma": ["tar", "--overwrite", "--lzma", "-xvf", "{filename}", "-C", "{extract_dir}/"],
+            "lz4": ["lz4", "-d", "--force", "{filename}", "{extract_dir}/"],
+            "tar": ["tar", "--overwrite", "-xvf", "{filename}", "-C", "{extract_dir}/"],
+            "tgz": ["tar", "--overwrite", "-xvzf", "{filename}", "-C", "{extract_dir}/"],
+            "gzip": ["gunzip", "--force", "--keep", "{filename}"],
         }
         return True
 
@@ -73,15 +38,12 @@ async def filter_event(self, event):
         return True
 
     async def handle_event(self, event):
-        compression_format = event.data["compression"]
         path = Path(event.data["path"])
         output_dir = path.parent / path.name.replace(".", "_")
-        self.helpers.mkdir(output_dir)
 
         # Use the appropriate extraction method based on the file type
         self.info(f"Extracting {path} to {output_dir}")
-        extract_method = self.compression_methods.get(compression_format)
-        success = extract_method(path, output_dir)
+        success = await self.extract_file(path, output_dir)
 
         # If the extraction was successful, emit the event
         if success:
@@ -95,288 +57,21 @@ async def handle_event(self, event):
         else:
             output_dir.rmdir()
 
-    def extract_zip_file(self, path, output_dir):
-        try:
-            with zipfile.ZipFile(path, "r") as zip_ref:
-                zip_ref.extractall(output_dir)
-        except Exception as e:
-            self.warning(f"Error extracting {path}. Exception: {repr(e)}")
-            return False
-        return True
-
-    def extract_bz2_file(self, path, output_file):
-        try:
-            with bz2.BZ2File(path, "rb") as file:
-                content = file.read()
-                with open(output_file, "wb") as f:
-                    f.write(content)
-        except Exception as e:
-            self.warning(f"Error extracting {path}. Exception: {repr(e)}")
-            return False
-        return True
-
-    def extract_xz_file(self, path, output_file):
-        try:
-            with lzma.open(path, "rb") as file:
-                content = file.read()
-                with open(output_file, "wb") as f:
-                    f.write(content)
-        except Exception as e:
-            self.warning(f"Error extracting {path}. Exception: {repr(e)}")
-            return False
-        return True
-
-    def extract_7z_file(self, path, output_dir):
-        try:
-            with py7zr.SevenZipFile(path, mode="r") as z:
-                z.extractall(path=output_dir)
-        except Exception as e:
-            self.warning(f"Error extracting {path}. Exception: {repr(e)}")
-            return False
-        return True
-
-    # def extract_rar_file(self, path, output_dir):
-    #     try:
-    #         with rarfile.RarFile(path, "r") as rar_ref:
-    #             rar_ref.extractall(output_dir)
-    #     except Exception as e:
-    #         self.warning(f"Error extracting {path}. Exception: {repr(e)}")
-    #         return False
-    #     return True
-
-    def extract_lzma_file(self, path, output_file):
-        try:
-            with lzma.open(path, "rb") as file:
-                content = file.read()
-                with open(output_file, "wb") as f:
-                    f.write(content)
-        except Exception as e:
-            self.warning(f"Error extracting {path}. Exception: {repr(e)}")
-            return False
-        return True
-
-    #
-    # def extract_compress_file(self, path, output_file):
-    #     try:
-    #         with open(path, "rb") as file:
-    #             content = file.read()
-    #             with open(output_file, "wb") as f:
-    #                 f.write(content)
-    #     except Exception as e:
-    #         self.warning(f"Error extracting {path}. Exception: {repr(e)}")
-    #         return False
-    #     return True
-    #
-    def extract_zstd_file(self, path, output_file):
-        try:
-            with open(path, "rb") as file:
-                dctx = zstd.ZstdDecompressor()
-                content = dctx.decompress(file.read())
-                with open(output_file, "wb") as f:
-                    f.write(content)
-        except Exception as e:
-            self.warning(f"Error extracting {path}. Exception: {repr(e)}")
-            return False
-        return True
-
-    def extract_lz4_file(self, path, output_file):
-        try:
-            with lz4.frame.open(path, "rb") as file:
-                content = file.read()
-                with open(output_file, "wb") as f:
-                    f.write(content)
-        except Exception as e:
-            self.warning(f"Error extracting {path}. Exception: {repr(e)}")
-            return False
-        return True
-
-    def extract_tar_file(self, path, output_dir):
-        try:
-            with tarfile.open(path, "r") as tar_ref:
-                tar_ref.extractall(output_dir)
-        except Exception as e:
-            self.warning(f"Error extracting {path}. Exception: {repr(e)}")
-            return False
-        return True
-
-    #
-    # def extract_pak_file(self, path, output_dir):
-    #     try:
-    #         expak.extract_resources(path, output_dir)
-    #     except Exception as e:
-    #         self.warning(f"Error extracting {path}. Exception: {repr(e)}")
-    #         return False
-    #     return True
-    #
-    # def extract_lha_file(self, path, output_dir):
-    #     try:
-    #         shutil.unpack_archive(path, output_dir)
-    #     except Exception as e:
-    #         self.warning(f"Error extracting {path}. Exception: {repr(e)}")
-    #         return False
-    #     return True
-    #
-    # def extract_arj_file(self, path, output_dir):
-    #     try:
-    #         shutil.unpack_archive(path, output_dir)
-    #     except Exception as e:
-    #         self.warning(f"Error extracting {path}. Exception: {repr(e)}")
-    #         return False
-    #     return True
-    #
-    # def extract_cab_file(self, path, output_dir):
-    #     try:
-    #         shutil.unpack_archive(path, output_dir)
-    #     except Exception as e:
-    #         self.warning(f"Error extracting {path}. Exception: {repr(e)}")
-    #         return False
-    #     return True
-    #
-    # def extract_sit_file(self, path, output_dir):
-    #     try:
-    #         shutil.unpack_archive(path, output_dir)
-    #     except Exception as e:
-    #         self.warning(f"Error extracting {path}. Exception: {repr(e)}")
-    #         return False
-    #     return True
-    #
-    # def extract_binhex_file(self, path, output_file):
-    #     try:
-    #         with open(path, "rb") as file:
-    #             content = file.read()
-    #             with open(output_file, "wb") as f:
-    #                 f.write(content)
-    #     except Exception as e:
-    #         self.warning(f"Error extracting {path}. Exception: {repr(e)}")
-    #         return False
-    #     return True
-    #
-    # def extract_lrzip_file(self, path, output_file):
-    #     try:
-    #         with open(path, "rb") as file:
-    #             content = file.read()
-    #             with open(output_file, "wb") as f:
-    #                 f.write(content)
-    #     except Exception as e:
-    #         self.warning(f"Error extracting {path}. Exception: {repr(e)}")
-    #         return False
-    #     return True
-    #
-    # def extract_alz_file(self, path, output_dir):
-    #     try:
-    #         shutil.unpack_archive(path, output_dir)
-    #     except Exception as e:
-    #         self.warning(f"Error extracting {path}. Exception: {repr(e)}")
-    #         return False
-    #     return True
-    #
-    def extract_gzip_file(self, path, output_dir):
-        try:
-            with tarfile.open(path, "r:gz") as tar_ref:
-                tar_ref.extractall(output_dir)
-        except Exception as e:
-            self.warning(f"Error extracting {path}. Exception: {repr(e)}")
-            return False
-        return True
-
-
-#
-# def extract_lzip_file(self, path, output_file):
-#     try:
-#         with open(path, "rb") as file:
-#             content = file.read()
-#             with open(output_file, "wb") as f:
-#                 f.write(content)
-#     except Exception as e:
-#         self.warning(f"Error extracting {path}. Exception: {repr(e)}")
-#         return False
-#     return True
-#
-# def extract_palm_file(self, path, output_file):
-#     try:
-#         with open(path, "rb") as file:
-#             content = file.read()
-#             with open(output_file, "wb") as f:
-#                 f.write(content)
-#     except Exception as e:
-#         self.warning(f"Error extracting {path}. Exception: {repr(e)}")
-#         return False
-#     return True
-#
-# def extract_cpio_file(self, path, output_dir):
-#     try:
-#         shutil.unpack_archive(path, output_dir)
-#     except Exception as e:
-#         self.warning(f"Error extracting {path}. Exception: {repr(e)}")
-#         return False
-#     return True
-#
-# def extract_pack200_file(self, path, output_file):
-#     try:
-#         with open(path, "rb") as file:
-#             content = file.read()
-#             with open(output_file, "wb") as f:
-#                 f.write(content)
-#     except Exception as e:
-#         self.warning(f"Error extracting {path}. Exception: {repr(e)}")
-#         return False
-#     return True
-#
-# def extract_par2_file(self, path, output_file):
-#     try:
-#         with open(path, "rb") as file:
-#             content = file.read()
-#             with open(output_file, "wb") as f:
-#                 f.write(content)
-#     except Exception as e:
-#         self.warning(f"Error extracting {path}. Exception: {repr(e)}")
-#         return False
-#     return True
-#
-# def extract_ar_file(self, path, output_dir):
-#     try:
-#         shutil.unpack_archive(path, output_dir)
-#     except Exception as e:
-#         self.warning(f"Error extracting {path}. Exception: {repr(e)}")
-#         return False
-#     return True
-#
-# def extract_qpress_file(self, path, output_dir):
-#     try:
-#         shutil.unpack_archive(path, output_dir)
-#     except Exception as e:
-#         self.warning(f"Error extracting {path}. Exception: {repr(e)}")
-#         return False
-#     return True
-#
-# def extract_xar_file(self, path, output_dir):
-#     try:
-#         shutil.unpack_archive(path, output_dir)
-#     except Exception as e:
-#         self.warning(f"Error extracting {path}. Exception: {repr(e)}")
-#         return False
-#     return True
-#
-# def extract_ace_file(self, path, output_dir):
-#     try:
-#         shutil.unpack_archive(path, output_dir)
-#     except Exception as e:
-#         self.warning(f"Error extracting {path}. Exception: {repr(e)}")
-#         return False
-#     return True
-#
-# def extract_zoo_file(self, path, output_dir):
-#     try:
-#         shutil.unpack_archive(path, output_dir)
-#     except Exception as e:
-#         self.warning(f"Error extracting {path}. Exception: {repr(e)}")
-#         return False
-#     return True
-#
-# def extract_arc_file(self, path, output_dir):
-#     try:
-#         shutil.unpack_archive(path, output_dir)
-#     except Exception as e:
-#         self.warning(f"Error extracting {path}. Exception: {repr(e)}")
-#         return False
-#     return True
+    async def extract_file(self, path, output_dir):
+        if not output_dir.exists():
+            self.helpers.mkdir(output_dir)
+        extension, mime_type, description, confidence = get_magic_info(path)
+        compression_format = get_compression(mime_type)
+        cmd_list = self.compression_methods.get(compression_format, [])
+        if cmd_list:
+            command = [s.format(filename=path, extract_dir=output_dir) for s in cmd_list]
+            try:
+                output = await self.run_process(command, check=True)
+                for item in output_dir.iterdir():
+                    if item.is_file():
+                        await self.extract_file(item, output_dir / item.stem)
+            except Exception as e:
+                self.warning(f"Error extracting {path}. Error: {e}")
+                self.warning(output)
+                return False
+            return True
diff --git a/bbot/test/test_step_2/module_tests/test_module_extract.py b/bbot/test/test_step_2/module_tests/test_module_extract.py
index a1d43d82a7..e76b5fdb14 100644
--- a/bbot/test/test_step_2/module_tests/test_module_extract.py
+++ b/bbot/test/test_step_2/module_tests/test_module_extract.py
@@ -3,9 +3,8 @@
 import lzma
 import tarfile
 
-# import rarfile
 import py7zr
-import zstandard as zstd
+from librar import archive
 import lz4.frame
 
 from pathlib import Path
@@ -45,9 +44,9 @@ class TestExtract(ModuleTestBase):
         z.write(text_file, "test.txt")
 
     # RAR
-    # rar_file = temp_path / "test.rar"
-    # with rarfile.RarFile(rar_file, "w") as r:
-    #     r.write(text_file, "test.txt")
+    rar_file = temp_path / "test.rar"
+    with archive.Archive(rar_file, base) as r:
+        r.write(text_file, "test.txt")
 
     # LZMA
     lzma_file = temp_path / "test.lzma"
@@ -60,14 +59,6 @@ class TestExtract(ModuleTestBase):
     with tarfile.open(tar_file, "w") as t:
         t.add(text_file, arcname="test.txt")
 
-    # ZSTD
-    zstd_file = temp_path / "test.zst"
-    with open(text_file, "rb") as f:
-        content = f.read()
-        with open(zstd_file, "wb") as z:
-            cctx = zstd.ZstdCompressor()
-            z.write(cctx.compress(content))
-
     # LZ4
     lz4_file = temp_path / "test.lz4"
     with open(text_file, "rb") as f:
@@ -88,9 +79,9 @@ async def setup_after_prep(self, module_test):
                 <a href="/test.bz2"/>
                 <a href="/test.xz"/>
                 <a href="/test.7z"/>
+                <a href="/test.rar"/>
                 <a href="/test.lzma"/>
                 <a href="/test.tar"/>
-                <a href="/test.zst"/>
                 <a href="/test.lz4"/>
                 <a href="/test.tgz"/>"""
             ),
@@ -123,25 +114,18 @@ async def setup_after_prep(self, module_test):
                 headers={"Content-Type": "application/x-7z-compressed"},
             ),
         ),
-        # module_test.set_expect_requests(
-        #     dict(uri="/test.rar"),
-        #     dict(
-        #         response_data=self.rar_file.read_bytes(),
-        #         headers={"Content-Type": "application/vnd.rar"},
-        #     ),
-        # ),
         module_test.set_expect_requests(
-            dict(uri="/test.lzma"),
+            dict(uri="/test.rar"),
             dict(
-                response_data=self.lzma_file.read_bytes(),
-                headers={"Content-Type": "application/x-lzma"},
+                response_data=self.rar_file.read_bytes(),
+                headers={"Content-Type": "application/vnd.rar"},
             ),
         ),
         module_test.set_expect_requests(
-            dict(uri="/test.zst"),
+            dict(uri="/test.lzma"),
             dict(
-                response_data=self.zstd_file.read_bytes(),
-                headers={"Content-Type": "application/zstd"},
+                response_data=self.lzma_file.read_bytes(),
+                headers={"Content-Type": "application/x-lzma"},
             ),
         ),
         module_test.set_expect_requests(
@@ -210,14 +194,14 @@ def check(self, module_test, events):
         assert extract_path.is_dir(), "Destination folder doesn't exist"
 
         # RAR
-        # rar_file_event = [e for e in filesystem_events if "test.rar" in e.data["path"]]
-        # assert 1 == len(rar_file_event), "No rar file found"
-        # file = Path(rar_file_event[0].data["path"])
-        # assert file.is_file(), f"File not found at {file}"
-        # extract_event = [e for e in filesystem_events if "test_rar" in e.data["path"] and "folder" in e.tags]
-        # assert 1 == len(extract_event), "Failed to extract rar"
-        # extract_path = Path(extract_event[0].data["path"])
-        # assert extract_path.is_dir(), "Destination folder doesn't exist"
+        rar_file_event = [e for e in filesystem_events if "test.rar" in e.data["path"]]
+        assert 1 == len(rar_file_event), "No rar file found"
+        file = Path(rar_file_event[0].data["path"])
+        assert file.is_file(), f"File not found at {file}"
+        extract_event = [e for e in filesystem_events if "test_rar" in e.data["path"] and "folder" in e.tags]
+        assert 1 == len(extract_event), "Failed to extract rar"
+        extract_path = Path(extract_event[0].data["path"])
+        assert extract_path.is_dir(), "Destination folder doesn't exist"
 
         # LZMA
         lzma_file_event = [e for e in filesystem_events if "test.lzma" in e.data["path"]]
@@ -229,16 +213,6 @@ def check(self, module_test, events):
         extract_path = Path(extract_event[0].data["path"])
         assert extract_path.is_dir(), "Destination folder doesn't exist"
 
-        # ZSTD
-        zstd_file_event = [e for e in filesystem_events if "test.zst" in e.data["path"]]
-        assert 1 == len(zstd_file_event), "No zstd file found"
-        file = Path(zstd_file_event[0].data["path"])
-        assert file.is_file(), f"File not found at {file}"
-        extract_event = [e for e in filesystem_events if "test_zst" in e.data["path"] and "folder" in e.tags]
-        assert 1 == len(extract_event), "Failed to extract zstd"
-        extract_path = Path(extract_event[0].data["path"])
-        assert extract_path.is_dir(), "Destination folder doesn't exist"
-
         # LZ4
         lz4_file_event = [e for e in filesystem_events if "test.lz4" in e.data["path"]]
         assert 1 == len(lz4_file_event), "No lz4 file found"

From 6c6a51181a4cd4e3f0da9b43b9350d63d345103f Mon Sep 17 00:00:00 2001
From: Dom Whewell <dom.whewell@sage.com>
Date: Sun, 8 Dec 2024 15:56:14 +0000
Subject: [PATCH 09/29] Made changes to the tests

---
 bbot/modules/filedownload.py                  |   9 +-
 bbot/modules/internal/extract.py              |  11 +-
 .../module_tests/test_module_extract.py       | 166 +++++++-----------
 3 files changed, 74 insertions(+), 112 deletions(-)

diff --git a/bbot/modules/filedownload.py b/bbot/modules/filedownload.py
index 34d616a894..23d0e2c1a6 100644
--- a/bbot/modules/filedownload.py
+++ b/bbot/modules/filedownload.py
@@ -75,12 +75,11 @@ class filedownload(BaseModule):
             "yaml",  #  YAML Ain't Markup Language
             "yml",  #  YAML Ain't Markup Language
             "zip",  #  Zip Archive
-            "bz2",  #  Bzip2 Compressed File
-            "xz",  #  XZ Compressed File
-            "7z",  #  7-Zip Compressed File
             "lzma",  #  LZMA Compressed File
-            "zst",  #  Zstandard Compressed File
-            "lz4",  #  LZ4 Compressed File
+            "rar",  #  RAR Compressed File
+            "7z",  #  7-Zip Compressed File
+            "xz",  #  XZ Compressed File
+            "bz2",  #  Bzip2 Compressed File
         ],
         "max_filesize": "10MB",
         "base_64_encoded_file": "false",
diff --git a/bbot/modules/internal/extract.py b/bbot/modules/internal/extract.py
index af08f940f6..259e038222 100644
--- a/bbot/modules/internal/extract.py
+++ b/bbot/modules/internal/extract.py
@@ -9,10 +9,10 @@ class extract(BaseInternalModule):
     flags = ["passive"]
     meta = {
         "description": "Extract different types of files into folders on the filesystem",
-        "created_date": "2024-11-04",
+        "created_date": "2024-12-08",
         "author": "@domwhewell-sage",
     }
-    deps_apt = ["7zip", "tar", "unrar", "gunzip", "zstd", "lz4"]
+    deps_apt = ["7zip", "tar", "rar", "unrar", "gunzip"]
 
     async def setup(self):
         self.compression_methods = {
@@ -22,10 +22,8 @@ async def setup(self):
             "7z": ["7z", "x", '-p""', "-aoa", "{filename}", "-o{extract_dir}/"],
             "rar": ["unrar", "x", "-o+", "-p-", "{filename}", "{extract_dir}/"],
             "lzma": ["tar", "--overwrite", "--lzma", "-xvf", "{filename}", "-C", "{extract_dir}/"],
-            "lz4": ["lz4", "-d", "--force", "{filename}", "{extract_dir}/"],
             "tar": ["tar", "--overwrite", "-xvf", "{filename}", "-C", "{extract_dir}/"],
-            "tgz": ["tar", "--overwrite", "-xvzf", "{filename}", "-C", "{extract_dir}/"],
-            "gzip": ["gunzip", "--force", "--keep", "{filename}"],
+            "gzip": ["tar", "--overwrite", "-xvzf", "{filename}", "-C", "{extract_dir}/"],
         }
         return True
 
@@ -66,12 +64,11 @@ async def extract_file(self, path, output_dir):
         if cmd_list:
             command = [s.format(filename=path, extract_dir=output_dir) for s in cmd_list]
             try:
-                output = await self.run_process(command, check=True)
+                await self.run_process(command, check=True)
                 for item in output_dir.iterdir():
                     if item.is_file():
                         await self.extract_file(item, output_dir / item.stem)
             except Exception as e:
                 self.warning(f"Error extracting {path}. Error: {e}")
-                self.warning(output)
                 return False
             return True
diff --git a/bbot/test/test_step_2/module_tests/test_module_extract.py b/bbot/test/test_step_2/module_tests/test_module_extract.py
index e76b5fdb14..15d1e785b2 100644
--- a/bbot/test/test_step_2/module_tests/test_module_extract.py
+++ b/bbot/test/test_step_2/module_tests/test_module_extract.py
@@ -1,11 +1,4 @@
-import zipfile
-import bz2
-import lzma
-import tarfile
-
-import py7zr
-from librar import archive
-import lz4.frame
+import subprocess
 
 from pathlib import Path
 from .base import ModuleTestBase
@@ -20,70 +13,43 @@ class TestExtract(ModuleTestBase):
     text_file = temp_path / "test.txt"
     with open(text_file, "w") as f:
         f.write("This is a test file")
-
-    # ZIP
     zip_file = temp_path / "test.zip"
-    with zipfile.ZipFile(zip_file, "w") as z:
-        z.write(text_file, "test.txt")
-
-    # BZ2
+    zip_zip_file = temp_path / "test_zip.zip"
     bz2_file = temp_path / "test.bz2"
-    with bz2.BZ2File(bz2_file, "wb") as b:
-        with open(text_file, "rb") as f:
-            b.write(f.read())
-
-    # XZ
     xz_file = temp_path / "test.xz"
-    with lzma.open(xz_file, "wb") as x:
-        with open(text_file, "rb") as f:
-            x.write(f.read())
-
-    # 7Z
-    seven_z_file = temp_path / "test.7z"
-    with py7zr.SevenZipFile(seven_z_file, "w") as z:
-        z.write(text_file, "test.txt")
-
-    # RAR
+    zip7_file = temp_path / "test.7z"
     rar_file = temp_path / "test.rar"
-    with archive.Archive(rar_file, base) as r:
-        r.write(text_file, "test.txt")
-
-    # LZMA
     lzma_file = temp_path / "test.lzma"
-    with lzma.open(lzma_file, "wb") as l:
-        with open(text_file, "rb") as f:
-            l.write(f.read())
-
-    # TAR
     tar_file = temp_path / "test.tar"
-    with tarfile.open(tar_file, "w") as t:
-        t.add(text_file, arcname="test.txt")
-
-    # LZ4
-    lz4_file = temp_path / "test.lz4"
-    with open(text_file, "rb") as f:
-        content = f.read()
-        with lz4.frame.open(lz4_file, "wb") as l:
-            l.write(content)
-
-    # TAR.GZ
     tgz_file = temp_path / "test.tgz"
-    with tarfile.open(tgz_file, "w:gz") as t:
-        t.add(text_file, arcname="test.txt")
+    commands = [
+        ("7z", "a", '-p""', "-aoa", f"{zip_file}", f"{text_file}"),
+        ("7z", "a", '-p""', "-aoa", f"{zip_zip_file}", f"{zip_file}"),
+        ("tar", "-C", f"{temp_path}", "-cvjf", f"{bz2_file}", f"{text_file.name}"),
+        ("tar", "-C", f"{temp_path}", "-cvJf", f"{xz_file}", f"{text_file.name}"),
+        ("7z", "a", '-p""', "-aoa", f"{zip7_file}", f"{text_file}"),
+        ("rar", "a", f"{rar_file}", f"{text_file}"),
+        ("tar", "-C", f"{temp_path}", "--lzma", "-cvf", f"{lzma_file}", f"{text_file.name}"),
+        ("tar", "-C", f"{temp_path}", "-cvf", f"{tar_file}", f"{text_file.name}"),
+        ("tar", "-C", f"{temp_path}", "-cvzf", f"{tgz_file}", f"{text_file.name}"),
+    ]
+
+    for command in commands:
+        subprocess.run(command, check=True)
 
     async def setup_after_prep(self, module_test):
         module_test.set_expect_requests(
             dict(uri="/"),
             dict(
-                response_data="""<a href="/test.zip"/>
-                <a href="/test.bz2"/>
-                <a href="/test.xz"/>
-                <a href="/test.7z"/>
-                <a href="/test.rar"/>
-                <a href="/test.lzma"/>
-                <a href="/test.tar"/>
-                <a href="/test.lz4"/>
-                <a href="/test.tgz"/>"""
+                response_data="""<a href="/test.zip">
+                <a href="/test-zip.zip">
+                <a href="/test.bz2">
+                <a href="/test.xz">
+                <a href="/test.7z">
+                <a href="/test.rar">
+                <a href="/test.lzma">
+                <a href="/test.tar">
+                <a href="/test.tgz">""",
             ),
         )
         module_test.set_expect_requests(
@@ -93,6 +59,13 @@ async def setup_after_prep(self, module_test):
                 headers={"Content-Type": "application/zip"},
             ),
         ),
+        module_test.set_expect_requests(
+            dict(uri="/test-zip.zip"),
+            dict(
+                response_data=self.zip_zip_file.read_bytes(),
+                headers={"Content-Type": "application/zip"},
+            ),
+        ),
         module_test.set_expect_requests(
             dict(uri="/test.bz2"),
             dict(
@@ -110,14 +83,14 @@ async def setup_after_prep(self, module_test):
         module_test.set_expect_requests(
             dict(uri="/test.7z"),
             dict(
-                response_data=self.seven_z_file.read_bytes(),
+                response_data=self.zip7_file.read_bytes(),
                 headers={"Content-Type": "application/x-7z-compressed"},
             ),
         ),
         module_test.set_expect_requests(
             dict(uri="/test.rar"),
             dict(
-                response_data=self.rar_file.read_bytes(),
+                response_data=self.zip7_file.read_bytes(),
                 headers={"Content-Type": "application/vnd.rar"},
             ),
         ),
@@ -128,13 +101,6 @@ async def setup_after_prep(self, module_test):
                 headers={"Content-Type": "application/x-lzma"},
             ),
         ),
-        module_test.set_expect_requests(
-            dict(uri="/test.lz4"),
-            dict(
-                response_data=self.lz4_file.read_bytes(),
-                headers={"Content-Type": "application/x-lz4"},
-            ),
-        ),
         module_test.set_expect_requests(
             dict(uri="/test.tar"),
             dict(
@@ -160,8 +126,18 @@ def check(self, module_test, events):
         assert file.is_file(), f"File not found at {file}"
         extract_event = [e for e in filesystem_events if "test_zip" in e.data["path"] and "folder" in e.tags]
         assert 1 == len(extract_event), "Failed to extract zip"
-        extract_path = Path(extract_event[0].data["path"])
-        assert extract_path.is_dir(), "Destination folder doesn't exist"
+        extract_path = Path(extract_event[0].data["path"]) / "test.txt"
+        assert extract_path.is_file(), "Failed to extract the test file"
+
+        # Recursive ZIP
+        zip_zip_file_event = [e for e in filesystem_events if "test-zip.zip" in e.data["path"]]
+        assert 1 == len(zip_zip_file_event), "No recursive file found"
+        file = Path(zip_zip_file_event[0].data["path"])
+        assert file.is_file(), f"File not found at {file}"
+        extract_event = [e for e in filesystem_events if "test-zip_zip" in e.data["path"] and "folder" in e.tags]
+        assert 1 == len(extract_event), "Failed to extract zip"
+        extract_path = Path(extract_event[0].data["path"]) / "test" / "test.txt"
+        assert extract_path.is_file(), "Failed to extract the test file"
 
         # BZ2
         bz2_file_event = [e for e in filesystem_events if "test.bz2" in e.data["path"]]
@@ -170,8 +146,8 @@ def check(self, module_test, events):
         assert file.is_file(), f"File not found at {file}"
         extract_event = [e for e in filesystem_events if "test_bz2" in e.data["path"] and "folder" in e.tags]
         assert 1 == len(extract_event), "Failed to extract bz2"
-        extract_path = Path(extract_event[0].data["path"])
-        assert extract_path.is_dir(), "Destination folder doesn't exist"
+        extract_path = Path(extract_event[0].data["path"]) / "test.txt"
+        assert extract_path.is_file(), "Failed to extract the test file"
 
         # XZ
         xz_file_event = [e for e in filesystem_events if "test.xz" in e.data["path"]]
@@ -180,18 +156,18 @@ def check(self, module_test, events):
         assert file.is_file(), f"File not found at {file}"
         extract_event = [e for e in filesystem_events if "test_xz" in e.data["path"] and "folder" in e.tags]
         assert 1 == len(extract_event), "Failed to extract xz"
-        extract_path = Path(extract_event[0].data["path"])
-        assert extract_path.is_dir(), "Destination folder doesn't exist"
+        extract_path = Path(extract_event[0].data["path"]) / "test.txt"
+        assert extract_path.is_file(), "Failed to extract the test file"
 
-        # 7Z
-        seven_z_file_event = [e for e in filesystem_events if "test.7z" in e.data["path"]]
-        assert 1 == len(seven_z_file_event), "No 7z file found"
-        file = Path(seven_z_file_event[0].data["path"])
+        # 7z
+        zip7_file_event = [e for e in filesystem_events if "test.7z" in e.data["path"]]
+        assert 1 == len(zip7_file_event), "No 7z file found"
+        file = Path(zip7_file_event[0].data["path"])
         assert file.is_file(), f"File not found at {file}"
         extract_event = [e for e in filesystem_events if "test_7z" in e.data["path"] and "folder" in e.tags]
         assert 1 == len(extract_event), "Failed to extract 7z"
-        extract_path = Path(extract_event[0].data["path"])
-        assert extract_path.is_dir(), "Destination folder doesn't exist"
+        extract_path = Path(extract_event[0].data["path"]) / "test.txt"
+        assert extract_path.is_file(), "Failed to extract the test file"
 
         # RAR
         rar_file_event = [e for e in filesystem_events if "test.rar" in e.data["path"]]
@@ -200,8 +176,8 @@ def check(self, module_test, events):
         assert file.is_file(), f"File not found at {file}"
         extract_event = [e for e in filesystem_events if "test_rar" in e.data["path"] and "folder" in e.tags]
         assert 1 == len(extract_event), "Failed to extract rar"
-        extract_path = Path(extract_event[0].data["path"])
-        assert extract_path.is_dir(), "Destination folder doesn't exist"
+        extract_path = Path(extract_event[0].data["path"]) / "test.txt"
+        assert extract_path.is_file(), "Failed to extract the test file"
 
         # LZMA
         lzma_file_event = [e for e in filesystem_events if "test.lzma" in e.data["path"]]
@@ -210,18 +186,8 @@ def check(self, module_test, events):
         assert file.is_file(), f"File not found at {file}"
         extract_event = [e for e in filesystem_events if "test_lzma" in e.data["path"] and "folder" in e.tags]
         assert 1 == len(extract_event), "Failed to extract lzma"
-        extract_path = Path(extract_event[0].data["path"])
-        assert extract_path.is_dir(), "Destination folder doesn't exist"
-
-        # LZ4
-        lz4_file_event = [e for e in filesystem_events if "test.lz4" in e.data["path"]]
-        assert 1 == len(lz4_file_event), "No lz4 file found"
-        file = Path(lz4_file_event[0].data["path"])
-        assert file.is_file(), f"File not found at {file}"
-        extract_event = [e for e in filesystem_events if "test_lz4" in e.data["path"] and "folder" in e.tags]
-        assert 1 == len(extract_event), "Failed to extract lz4"
-        extract_path = Path(extract_event[0].data["path"])
-        assert extract_path.is_dir(), "Destination folder doesn't exist"
+        extract_path = Path(extract_event[0].data["path"]) / "test.txt"
+        assert extract_path.is_file(), "Failed to extract the test file"
 
         # TAR
         tar_file_event = [e for e in filesystem_events if "test.tar" in e.data["path"]]
@@ -230,15 +196,15 @@ def check(self, module_test, events):
         assert file.is_file(), f"File not found at {file}"
         extract_event = [e for e in filesystem_events if "test_tar" in e.data["path"] and "folder" in e.tags]
         assert 1 == len(extract_event), "Failed to extract tar"
-        extract_path = Path(extract_event[0].data["path"])
-        assert extract_path.is_dir(), "Destination folder doesn't exist"
+        extract_path = Path(extract_event[0].data["path"]) / "test.txt"
+        assert extract_path.is_file(), "Failed to extract the test file"
 
-        # TAR.GZ
+        # TGZ
         tgz_file_event = [e for e in filesystem_events if "test.tgz" in e.data["path"]]
         assert 1 == len(tgz_file_event), "No tgz file found"
         file = Path(tgz_file_event[0].data["path"])
         assert file.is_file(), f"File not found at {file}"
         extract_event = [e for e in filesystem_events if "test_tgz" in e.data["path"] and "folder" in e.tags]
         assert 1 == len(extract_event), "Failed to extract tgz"
-        extract_path = Path(extract_event[0].data["path"])
-        assert extract_path.is_dir(), "Destination folder doesn't exist"
+        extract_path = Path(extract_event[0].data["path"]) / "test.txt"
+        assert extract_path.is_file(), "Failed to extract the test file"

From b71841a170268fd6de95bf40cd76dddbbc827d66 Mon Sep 17 00:00:00 2001
From: Dom Whewell <dom.whewell@sage.com>
Date: Sun, 8 Dec 2024 15:58:49 +0000
Subject: [PATCH 10/29] Remove jadx compatable types from compression map

---
 bbot/core/helpers/libmagic.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/bbot/core/helpers/libmagic.py b/bbot/core/helpers/libmagic.py
index 5e1279d9c7..adbd676bcb 100644
--- a/bbot/core/helpers/libmagic.py
+++ b/bbot/core/helpers/libmagic.py
@@ -21,9 +21,7 @@ def get_compression(mime_type):
         "application/fictionbook2+zip": "zip",  # FictionBook 2.0 (Zip)
         "application/fictionbook3+zip": "zip",  # FictionBook 3.0 (Zip)
         "application/gzip": "gzip",  # Gzip compressed file
-        "application/java-archive": "zip",  # Java Archive (JAR)
         "application/pak": "pak",  # PAK archive
-        "application/vnd.android.package-archive": "zip",  # Android package (APK)
         "application/vnd.comicbook-rar": "rar",  # Comic book archive (RAR)
         "application/vnd.comicbook+zip": "zip",  # Comic book archive (Zip)
         "application/vnd.ms-cab-compressed": "cab",  # Microsoft Cabinet archive

From 7db38fd334263a4d7d4cb51e8030f2fb65a07ed6 Mon Sep 17 00:00:00 2001
From: Dom Whewell <dom.whewell@sage.com>
Date: Sun, 15 Dec 2024 11:26:37 +0000
Subject: [PATCH 11/29] Rename to unarchive, move jar exclusions into module
 and restore helper

---
 bbot/core/helpers/libmagic.py                 |   2 +
 .../internal/{extract.py => unarchive.py}     |   7 +-
 bbot/test/test_step_1/test_cli.py             |   6 +-
 bbot/test/test_step_1/test_presets.py         |   4 +-
 ...le_extract.py => test_module_unarchive.py} | 130 ++++++++++--------
 5 files changed, 86 insertions(+), 63 deletions(-)
 rename bbot/modules/internal/{extract.py => unarchive.py} (89%)
 rename bbot/test/test_step_2/module_tests/{test_module_extract.py => test_module_unarchive.py} (75%)

diff --git a/bbot/core/helpers/libmagic.py b/bbot/core/helpers/libmagic.py
index 535c99c8cb..37612f558e 100644
--- a/bbot/core/helpers/libmagic.py
+++ b/bbot/core/helpers/libmagic.py
@@ -20,7 +20,9 @@ def get_compression(mime_type):
         "application/fictionbook2+zip": "zip",  # FictionBook 2.0 (Zip)
         "application/fictionbook3+zip": "zip",  # FictionBook 3.0 (Zip)
         "application/gzip": "gzip",  # Gzip compressed file
+        "application/java-archive": "zip",  # Java Archive (JAR)
         "application/pak": "pak",  # PAK archive
+        "application/vnd.android.package-archive": "zip",  # Android package (APK)
         "application/vnd.comicbook-rar": "rar",  # Comic book archive (RAR)
         "application/vnd.comicbook+zip": "zip",  # Comic book archive (Zip)
         "application/vnd.ms-cab-compressed": "cab",  # Microsoft Cabinet archive
diff --git a/bbot/modules/internal/extract.py b/bbot/modules/internal/unarchive.py
similarity index 89%
rename from bbot/modules/internal/extract.py
rename to bbot/modules/internal/unarchive.py
index 259e038222..77ad2e2dee 100644
--- a/bbot/modules/internal/extract.py
+++ b/bbot/modules/internal/unarchive.py
@@ -3,10 +3,10 @@
 from bbot.core.helpers.libmagic import get_magic_info, get_compression
 
 
-class extract(BaseInternalModule):
+class unarchive(BaseInternalModule):
     watched_events = ["FILESYSTEM"]
     produced_events = ["FILESYSTEM"]
-    flags = ["passive"]
+    flags = ["passive", "safe"]
     meta = {
         "description": "Extract different types of files into folders on the filesystem",
         "created_date": "2024-12-08",
@@ -15,6 +15,7 @@ class extract(BaseInternalModule):
     deps_apt = ["7zip", "tar", "rar", "unrar", "gunzip"]
 
     async def setup(self):
+        self.ignore_compressions = ["application/java-archive", "application/vnd.android.package-archive"]
         self.compression_methods = {
             "zip": ["7z", "x", '-p""', "-aoa", "{filename}", "-o{extract_dir}/"],
             "bzip2": ["tar", "--overwrite", "-xvjf", "{filename}", "-C", "{extract_dir}/"],
@@ -29,6 +30,8 @@ async def setup(self):
 
     async def filter_event(self, event):
         if "file" in event.tags:
+            if event.data["magic_mime_type"] in self.ignore_compressions:
+                return False, f"Ignoring file type: {event.data['magic_mime_type']}, {event.data['path']}"
             if not event.data["compression"] in self.compression_methods:
                 return False, f"Extract unable to handle file type: {event.data['compression']}, {event.data['path']}"
         else:
diff --git a/bbot/test/test_step_1/test_cli.py b/bbot/test/test_step_1/test_cli.py
index 26aca10647..07fb4747a4 100644
--- a/bbot/test/test_step_1/test_cli.py
+++ b/bbot/test/test_step_1/test_cli.py
@@ -326,17 +326,17 @@ async def test_cli_args(monkeypatch, caplog, capsys, clean_default_config):
     monkeypatch.setattr("sys.argv", ["bbot", "-y"])
     result = await cli._main()
     assert result is True
-    assert "Loaded 6/6 internal modules (aggregate,cloudcheck,dnsresolve,excavate,extract,speculate)" in caplog.text
+    assert "Loaded 6/6 internal modules (aggregate,cloudcheck,dnsresolve,excavate,unarchive,speculate)" in caplog.text
     caplog.clear()
     monkeypatch.setattr("sys.argv", ["bbot", "-em", "excavate", "speculate", "-y"])
     result = await cli._main()
     assert result is True
-    assert "Loaded 4/4 internal modules (aggregate,cloudcheck,dnsresolve,extract)" in caplog.text
+    assert "Loaded 4/4 internal modules (aggregate,cloudcheck,dnsresolve,unarchive)" in caplog.text
     caplog.clear()
     monkeypatch.setattr("sys.argv", ["bbot", "-c", "speculate=false", "-y"])
     result = await cli._main()
     assert result is True
-    assert "Loaded 5/5 internal modules (aggregate,cloudcheck,dnsresolve,excavate,extract)" in caplog.text
+    assert "Loaded 5/5 internal modules (aggregate,cloudcheck,dnsresolve,excavate,unarchive)" in caplog.text
 
     # custom target type
     out, err = capsys.readouterr()
diff --git a/bbot/test/test_step_1/test_presets.py b/bbot/test/test_step_1/test_presets.py
index 43f571e13e..3ac076b067 100644
--- a/bbot/test/test_step_1/test_presets.py
+++ b/bbot/test/test_step_1/test_presets.py
@@ -496,7 +496,7 @@ def test_preset_module_resolution(clean_default_config):
     assert set(preset.internal_modules) == {
         "aggregate",
         "excavate",
-        "extract",
+        "unarchive",
         "speculate",
         "cloudcheck",
         "dnsresolve",
@@ -560,7 +560,7 @@ def test_preset_module_resolution(clean_default_config):
         "dnsresolve",
         "aggregate",
         "excavate",
-        "extract",
+        "unarchive",
         "txt",
         "httpx",
         "csv",
diff --git a/bbot/test/test_step_2/module_tests/test_module_extract.py b/bbot/test/test_step_2/module_tests/test_module_unarchive.py
similarity index 75%
rename from bbot/test/test_step_2/module_tests/test_module_extract.py
rename to bbot/test/test_step_2/module_tests/test_module_unarchive.py
index 15d1e785b2..ca40e9e16c 100644
--- a/bbot/test/test_step_2/module_tests/test_module_extract.py
+++ b/bbot/test/test_step_2/module_tests/test_module_unarchive.py
@@ -4,9 +4,9 @@
 from .base import ModuleTestBase
 
 
-class TestExtract(ModuleTestBase):
+class TestUnarchive(ModuleTestBase):
     targets = ["http://127.0.0.1:8888"]
-    modules_overrides = ["filedownload", "httpx", "excavate", "speculate", "extract"]
+    modules_overrides = ["filedownload", "httpx", "excavate", "speculate", "unarchive"]
     temp_path = Path("/tmp/.bbot_test")
 
     # Create a text file to compress
@@ -52,69 +52,87 @@ async def setup_after_prep(self, module_test):
                 <a href="/test.tgz">""",
             ),
         )
-        module_test.set_expect_requests(
-            dict(uri="/test.zip"),
-            dict(
-                response_data=self.zip_file.read_bytes(),
-                headers={"Content-Type": "application/zip"},
+        (
+            module_test.set_expect_requests(
+                dict(uri="/test.zip"),
+                dict(
+                    response_data=self.zip_file.read_bytes(),
+                    headers={"Content-Type": "application/zip"},
+                ),
             ),
-        ),
-        module_test.set_expect_requests(
-            dict(uri="/test-zip.zip"),
-            dict(
-                response_data=self.zip_zip_file.read_bytes(),
-                headers={"Content-Type": "application/zip"},
+        )
+        (
+            module_test.set_expect_requests(
+                dict(uri="/test-zip.zip"),
+                dict(
+                    response_data=self.zip_zip_file.read_bytes(),
+                    headers={"Content-Type": "application/zip"},
+                ),
             ),
-        ),
-        module_test.set_expect_requests(
-            dict(uri="/test.bz2"),
-            dict(
-                response_data=self.bz2_file.read_bytes(),
-                headers={"Content-Type": "application/x-bzip2"},
+        )
+        (
+            module_test.set_expect_requests(
+                dict(uri="/test.bz2"),
+                dict(
+                    response_data=self.bz2_file.read_bytes(),
+                    headers={"Content-Type": "application/x-bzip2"},
+                ),
             ),
-        ),
-        module_test.set_expect_requests(
-            dict(uri="/test.xz"),
-            dict(
-                response_data=self.xz_file.read_bytes(),
-                headers={"Content-Type": "application/x-xz"},
+        )
+        (
+            module_test.set_expect_requests(
+                dict(uri="/test.xz"),
+                dict(
+                    response_data=self.xz_file.read_bytes(),
+                    headers={"Content-Type": "application/x-xz"},
+                ),
             ),
-        ),
-        module_test.set_expect_requests(
-            dict(uri="/test.7z"),
-            dict(
-                response_data=self.zip7_file.read_bytes(),
-                headers={"Content-Type": "application/x-7z-compressed"},
+        )
+        (
+            module_test.set_expect_requests(
+                dict(uri="/test.7z"),
+                dict(
+                    response_data=self.zip7_file.read_bytes(),
+                    headers={"Content-Type": "application/x-7z-compressed"},
+                ),
             ),
-        ),
-        module_test.set_expect_requests(
-            dict(uri="/test.rar"),
-            dict(
-                response_data=self.zip7_file.read_bytes(),
-                headers={"Content-Type": "application/vnd.rar"},
+        )
+        (
+            module_test.set_expect_requests(
+                dict(uri="/test.rar"),
+                dict(
+                    response_data=self.zip7_file.read_bytes(),
+                    headers={"Content-Type": "application/vnd.rar"},
+                ),
             ),
-        ),
-        module_test.set_expect_requests(
-            dict(uri="/test.lzma"),
-            dict(
-                response_data=self.lzma_file.read_bytes(),
-                headers={"Content-Type": "application/x-lzma"},
+        )
+        (
+            module_test.set_expect_requests(
+                dict(uri="/test.lzma"),
+                dict(
+                    response_data=self.lzma_file.read_bytes(),
+                    headers={"Content-Type": "application/x-lzma"},
+                ),
             ),
-        ),
-        module_test.set_expect_requests(
-            dict(uri="/test.tar"),
-            dict(
-                response_data=self.tar_file.read_bytes(),
-                headers={"Content-Type": "application/x-tar"},
+        )
+        (
+            module_test.set_expect_requests(
+                dict(uri="/test.tar"),
+                dict(
+                    response_data=self.tar_file.read_bytes(),
+                    headers={"Content-Type": "application/x-tar"},
+                ),
             ),
-        ),
-        module_test.set_expect_requests(
-            dict(uri="/test.tgz"),
-            dict(
-                response_data=self.tgz_file.read_bytes(),
-                headers={"Content-Type": "application/x-tgz"},
+        )
+        (
+            module_test.set_expect_requests(
+                dict(uri="/test.tgz"),
+                dict(
+                    response_data=self.tgz_file.read_bytes(),
+                    headers={"Content-Type": "application/x-tgz"},
+                ),
             ),
-        ),
+        )
 
     def check(self, module_test, events):
         filesystem_events = [e for e in events if e.type == "FILESYSTEM"]

From 892663d89be114783c0c3594dcd2142afc276534 Mon Sep 17 00:00:00 2001
From: Dom Whewell <dom.whewell@sage.com>
Date: Thu, 19 Dec 2024 21:11:27 +0000
Subject: [PATCH 12/29] Change lzma to 7zip

---
 bbot/modules/internal/unarchive.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bbot/modules/internal/unarchive.py b/bbot/modules/internal/unarchive.py
index 77ad2e2dee..253a345f47 100644
--- a/bbot/modules/internal/unarchive.py
+++ b/bbot/modules/internal/unarchive.py
@@ -22,7 +22,7 @@ async def setup(self):
             "xz": ["tar", "--overwrite", "-xvJf", "{filename}", "-C", "{extract_dir}/"],
             "7z": ["7z", "x", '-p""', "-aoa", "{filename}", "-o{extract_dir}/"],
             "rar": ["unrar", "x", "-o+", "-p-", "{filename}", "{extract_dir}/"],
-            "lzma": ["tar", "--overwrite", "--lzma", "-xvf", "{filename}", "-C", "{extract_dir}/"],
+            "lzma": ["7z", "x", '-p""', "-aoa", "{filename}", "-o{extract_dir}/"],
             "tar": ["tar", "--overwrite", "-xvf", "{filename}", "-C", "{extract_dir}/"],
             "gzip": ["tar", "--overwrite", "-xvzf", "{filename}", "-C", "{extract_dir}/"],
         }

From 3dfe07b4aa3edfbdc1171e31d2cb5d151b465cec Mon Sep 17 00:00:00 2001
From: Dom Whewell <dom.whewell@sage.com>
Date: Fri, 20 Dec 2024 18:55:22 +0000
Subject: [PATCH 13/29] Remove apt_deps

---
 bbot/modules/internal/unarchive.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/bbot/modules/internal/unarchive.py b/bbot/modules/internal/unarchive.py
index 253a345f47..f3fb980a68 100644
--- a/bbot/modules/internal/unarchive.py
+++ b/bbot/modules/internal/unarchive.py
@@ -12,7 +12,6 @@ class unarchive(BaseInternalModule):
         "created_date": "2024-12-08",
         "author": "@domwhewell-sage",
     }
-    deps_apt = ["7zip", "tar", "rar", "unrar", "gunzip"]
 
     async def setup(self):
         self.ignore_compressions = ["application/java-archive", "application/vnd.android.package-archive"]

From 12c68fbf238136b3ca0aa27667951f392702836f Mon Sep 17 00:00:00 2001
From: Dom Whewell <dom.whewell@sage.com>
Date: Fri, 20 Dec 2024 20:04:18 +0000
Subject: [PATCH 14/29] Move file creation to setup_after_prep

---
 .../module_tests/test_module_unarchive.py     | 83 ++++++++++---------
 1 file changed, 44 insertions(+), 39 deletions(-)

diff --git a/bbot/test/test_step_2/module_tests/test_module_unarchive.py b/bbot/test/test_step_2/module_tests/test_module_unarchive.py
index ca40e9e16c..83449556ec 100644
--- a/bbot/test/test_step_2/module_tests/test_module_unarchive.py
+++ b/bbot/test/test_step_2/module_tests/test_module_unarchive.py
@@ -1,4 +1,4 @@
-import subprocess
+import asyncio
 
 from pathlib import Path
 from .base import ModuleTestBase
@@ -7,37 +7,42 @@
 class TestUnarchive(ModuleTestBase):
     targets = ["http://127.0.0.1:8888"]
     modules_overrides = ["filedownload", "httpx", "excavate", "speculate", "unarchive"]
-    temp_path = Path("/tmp/.bbot_test")
-
-    # Create a text file to compress
-    text_file = temp_path / "test.txt"
-    with open(text_file, "w") as f:
-        f.write("This is a test file")
-    zip_file = temp_path / "test.zip"
-    zip_zip_file = temp_path / "test_zip.zip"
-    bz2_file = temp_path / "test.bz2"
-    xz_file = temp_path / "test.xz"
-    zip7_file = temp_path / "test.7z"
-    rar_file = temp_path / "test.rar"
-    lzma_file = temp_path / "test.lzma"
-    tar_file = temp_path / "test.tar"
-    tgz_file = temp_path / "test.tgz"
-    commands = [
-        ("7z", "a", '-p""', "-aoa", f"{zip_file}", f"{text_file}"),
-        ("7z", "a", '-p""', "-aoa", f"{zip_zip_file}", f"{zip_file}"),
-        ("tar", "-C", f"{temp_path}", "-cvjf", f"{bz2_file}", f"{text_file.name}"),
-        ("tar", "-C", f"{temp_path}", "-cvJf", f"{xz_file}", f"{text_file.name}"),
-        ("7z", "a", '-p""', "-aoa", f"{zip7_file}", f"{text_file}"),
-        ("rar", "a", f"{rar_file}", f"{text_file}"),
-        ("tar", "-C", f"{temp_path}", "--lzma", "-cvf", f"{lzma_file}", f"{text_file.name}"),
-        ("tar", "-C", f"{temp_path}", "-cvf", f"{tar_file}", f"{text_file.name}"),
-        ("tar", "-C", f"{temp_path}", "-cvzf", f"{tgz_file}", f"{text_file.name}"),
-    ]
-
-    for command in commands:
-        subprocess.run(command, check=True)
 
     async def setup_after_prep(self, module_test):
+        temp_path = Path("/tmp/.bbot_test")
+
+        # Create a text file to compress
+        text_file = temp_path / "test.txt"
+        with open(text_file, "w") as f:
+            f.write("This is a test file")
+        zip_file = temp_path / "test.zip"
+        zip_zip_file = temp_path / "test_zip.zip"
+        bz2_file = temp_path / "test.bz2"
+        xz_file = temp_path / "test.xz"
+        zip7_file = temp_path / "test.7z"
+        rar_file = temp_path / "test.rar"
+        lzma_file = temp_path / "test.lzma"
+        tar_file = temp_path / "test.tar"
+        tgz_file = temp_path / "test.tgz"
+        commands = [
+            ("7z", "a", '-p""', "-aoa", f"{zip_file}", f"{text_file}"),
+            ("7z", "a", '-p""', "-aoa", f"{zip_zip_file}", f"{zip_file}"),
+            ("tar", "-C", f"{temp_path}", "-cvjf", f"{bz2_file}", f"{text_file.name}"),
+            ("tar", "-C", f"{temp_path}", "-cvJf", f"{xz_file}", f"{text_file.name}"),
+            ("7z", "a", '-p""', "-aoa", f"{zip7_file}", f"{text_file}"),
+            ("rar", "a", f"{rar_file}", f"{text_file}"),
+            ("tar", "-C", f"{temp_path}", "--lzma", "-cvf", f"{lzma_file}", f"{text_file.name}"),
+            ("tar", "-C", f"{temp_path}", "-cvf", f"{tar_file}", f"{text_file.name}"),
+            ("tar", "-C", f"{temp_path}", "-cvzf", f"{tgz_file}", f"{text_file.name}"),
+        ]
+
+        for command in commands:
+            process = await asyncio.create_subprocess_exec(
+                *command, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
+            )
+            stdout, stderr = await process.communicate()
+            assert process.returncode == 0, f"Command {command} failed with error: {stderr.decode()}"
+
         module_test.set_expect_requests(
             dict(uri="/"),
             dict(
@@ -56,7 +61,7 @@ async def setup_after_prep(self, module_test):
             module_test.set_expect_requests(
                 dict(uri="/test.zip"),
                 dict(
-                    response_data=self.zip_file.read_bytes(),
+                    response_data=zip_file.read_bytes(),
                     headers={"Content-Type": "application/zip"},
                 ),
             ),
@@ -65,7 +70,7 @@ async def setup_after_prep(self, module_test):
             module_test.set_expect_requests(
                 dict(uri="/test-zip.zip"),
                 dict(
-                    response_data=self.zip_zip_file.read_bytes(),
+                    response_data=zip_zip_file.read_bytes(),
                     headers={"Content-Type": "application/zip"},
                 ),
             ),
@@ -74,7 +79,7 @@ async def setup_after_prep(self, module_test):
             module_test.set_expect_requests(
                 dict(uri="/test.bz2"),
                 dict(
-                    response_data=self.bz2_file.read_bytes(),
+                    response_data=bz2_file.read_bytes(),
                     headers={"Content-Type": "application/x-bzip2"},
                 ),
             ),
@@ -83,7 +88,7 @@ async def setup_after_prep(self, module_test):
             module_test.set_expect_requests(
                 dict(uri="/test.xz"),
                 dict(
-                    response_data=self.xz_file.read_bytes(),
+                    response_data=xz_file.read_bytes(),
                     headers={"Content-Type": "application/x-xz"},
                 ),
             ),
@@ -92,7 +97,7 @@ async def setup_after_prep(self, module_test):
             module_test.set_expect_requests(
                 dict(uri="/test.7z"),
                 dict(
-                    response_data=self.zip7_file.read_bytes(),
+                    response_data=zip7_file.read_bytes(),
                     headers={"Content-Type": "application/x-7z-compressed"},
                 ),
             ),
@@ -101,7 +106,7 @@ async def setup_after_prep(self, module_test):
             module_test.set_expect_requests(
                 dict(uri="/test.rar"),
                 dict(
-                    response_data=self.zip7_file.read_bytes(),
+                    response_data=zip7_file.read_bytes(),
                     headers={"Content-Type": "application/vnd.rar"},
                 ),
             ),
@@ -110,7 +115,7 @@ async def setup_after_prep(self, module_test):
             module_test.set_expect_requests(
                 dict(uri="/test.lzma"),
                 dict(
-                    response_data=self.lzma_file.read_bytes(),
+                    response_data=lzma_file.read_bytes(),
                     headers={"Content-Type": "application/x-lzma"},
                 ),
             ),
@@ -119,7 +124,7 @@ async def setup_after_prep(self, module_test):
             module_test.set_expect_requests(
                 dict(uri="/test.tar"),
                 dict(
-                    response_data=self.tar_file.read_bytes(),
+                    response_data=tar_file.read_bytes(),
                     headers={"Content-Type": "application/x-tar"},
                 ),
             ),
@@ -128,7 +133,7 @@ async def setup_after_prep(self, module_test):
             module_test.set_expect_requests(
                 dict(uri="/test.tgz"),
                 dict(
-                    response_data=self.tgz_file.read_bytes(),
+                    response_data=tgz_file.read_bytes(),
                     headers={"Content-Type": "application/x-tgz"},
                 ),
             ),

From 0aa69b2ed26c1369b84e2463286fd799410e04f0 Mon Sep 17 00:00:00 2001
From: Dom Whewell <dom.whewell@sage.com>
Date: Fri, 20 Dec 2024 20:19:51 +0000
Subject: [PATCH 15/29] Swap unarchive and speculate

---
 bbot/test/test_step_1/test_cli.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bbot/test/test_step_1/test_cli.py b/bbot/test/test_step_1/test_cli.py
index b210bdcddd..c123905d3b 100644
--- a/bbot/test/test_step_1/test_cli.py
+++ b/bbot/test/test_step_1/test_cli.py
@@ -342,7 +342,7 @@ async def test_cli_args(monkeypatch, caplog, capsys, clean_default_config):
     monkeypatch.setattr("sys.argv", ["bbot", "-y"])
     result = await cli._main()
     assert result is True
-    assert "Loaded 6/6 internal modules (aggregate,cloudcheck,dnsresolve,excavate,unarchive,speculate)" in caplog.text
+    assert "Loaded 6/6 internal modules (aggregate,cloudcheck,dnsresolve,excavate,speculate,unarchive)" in caplog.text
     caplog.clear()
     monkeypatch.setattr("sys.argv", ["bbot", "-em", "excavate", "speculate", "-y"])
     result = await cli._main()

From 4e22bb2d48d5c3ec83ac6f88106cc1d5c0ece1fb Mon Sep 17 00:00:00 2001
From: Dom Whewell <dom.whewell@sage.com>
Date: Fri, 20 Dec 2024 21:08:49 +0000
Subject: [PATCH 16/29] Add rar to the `CORE_DEPS`

---
 bbot/core/helpers/depsinstaller/installer.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/bbot/core/helpers/depsinstaller/installer.py b/bbot/core/helpers/depsinstaller/installer.py
index 48d2f970fa..732f0690a1 100644
--- a/bbot/core/helpers/depsinstaller/installer.py
+++ b/bbot/core/helpers/depsinstaller/installer.py
@@ -31,6 +31,7 @@ class DepsInstaller:
         "gcc": "gcc",
         "bash": "bash",
         "which": "which",
+        "rar": "rar",
         "unrar": "unrar-free",
         "tar": "tar",
         # debian why are you like this

From a4a3712c860c61170e062852584ad03398c9a5a4 Mon Sep 17 00:00:00 2001
From: Dom Whewell <dom.whewell@sage.com>
Date: Wed, 1 Jan 2025 19:32:38 +0000
Subject: [PATCH 17/29] Remove rar install and move it to /test

---
 bbot/core/helpers/depsinstaller/installer.py       |   1 -
 bbot/test/bbot_fixtures.py                         |   7 +++++++
 bbot/test/test.rar                                 | Bin 0 -> 93 bytes
 .../module_tests/test_module_unarchive.py          |   7 +++----
 4 files changed, 10 insertions(+), 5 deletions(-)
 create mode 100644 bbot/test/test.rar

diff --git a/bbot/core/helpers/depsinstaller/installer.py b/bbot/core/helpers/depsinstaller/installer.py
index 732f0690a1..48d2f970fa 100644
--- a/bbot/core/helpers/depsinstaller/installer.py
+++ b/bbot/core/helpers/depsinstaller/installer.py
@@ -31,7 +31,6 @@ class DepsInstaller:
         "gcc": "gcc",
         "bash": "bash",
         "which": "which",
-        "rar": "rar",
         "unrar": "unrar-free",
         "tar": "tar",
         # debian why are you like this
diff --git a/bbot/test/bbot_fixtures.py b/bbot/test/bbot_fixtures.py
index 070df6e9a3..1498da7eaf 100644
--- a/bbot/test/bbot_fixtures.py
+++ b/bbot/test/bbot_fixtures.py
@@ -49,6 +49,13 @@ def tempapkfile():
     return apk_file
 
 
+def temprarfile():
+    current_dir = Path(__file__).parent
+    with open(current_dir / "test.rar", "rb") as f:
+        rar_file = f.read()
+    return rar_file
+
+
 @pytest.fixture
 def clean_default_config(monkeypatch):
     clean_config = OmegaConf.merge(
diff --git a/bbot/test/test.rar b/bbot/test/test.rar
new file mode 100644
index 0000000000000000000000000000000000000000..c900503caa5b846c30c1b1717081d19a95dfb411
GIT binary patch
literal 93
zcmWGaEK-zWXJjy*wDl<$BP$yND<fk=1H&S}zGO8fX70%hERz|QG&7wPOk-<cVB{!C
tEiTb3sVL!M77m_Pnm+BbG+RhUX0ZYgB`Sa<71A<uQf14-0-0G^7yyc<8TSAH

literal 0
HcmV?d00001

diff --git a/bbot/test/test_step_2/module_tests/test_module_unarchive.py b/bbot/test/test_step_2/module_tests/test_module_unarchive.py
index 83449556ec..2972429b0f 100644
--- a/bbot/test/test_step_2/module_tests/test_module_unarchive.py
+++ b/bbot/test/test_step_2/module_tests/test_module_unarchive.py
@@ -1,7 +1,7 @@
 import asyncio
 
 from pathlib import Path
-from .base import ModuleTestBase
+from .base import ModuleTestBase, temprarfile
 
 
 class TestUnarchive(ModuleTestBase):
@@ -20,7 +20,6 @@ async def setup_after_prep(self, module_test):
         bz2_file = temp_path / "test.bz2"
         xz_file = temp_path / "test.xz"
         zip7_file = temp_path / "test.7z"
-        rar_file = temp_path / "test.rar"
         lzma_file = temp_path / "test.lzma"
         tar_file = temp_path / "test.tar"
         tgz_file = temp_path / "test.tgz"
@@ -30,7 +29,6 @@ async def setup_after_prep(self, module_test):
             ("tar", "-C", f"{temp_path}", "-cvjf", f"{bz2_file}", f"{text_file.name}"),
             ("tar", "-C", f"{temp_path}", "-cvJf", f"{xz_file}", f"{text_file.name}"),
             ("7z", "a", '-p""', "-aoa", f"{zip7_file}", f"{text_file}"),
-            ("rar", "a", f"{rar_file}", f"{text_file}"),
             ("tar", "-C", f"{temp_path}", "--lzma", "-cvf", f"{lzma_file}", f"{text_file.name}"),
             ("tar", "-C", f"{temp_path}", "-cvf", f"{tar_file}", f"{text_file.name}"),
             ("tar", "-C", f"{temp_path}", "-cvzf", f"{tgz_file}", f"{text_file.name}"),
@@ -42,6 +40,7 @@ async def setup_after_prep(self, module_test):
             )
             stdout, stderr = await process.communicate()
             assert process.returncode == 0, f"Command {command} failed with error: {stderr.decode()}"
+        rar_file = temprarfile()
 
         module_test.set_expect_requests(
             dict(uri="/"),
@@ -106,7 +105,7 @@ async def setup_after_prep(self, module_test):
             module_test.set_expect_requests(
                 dict(uri="/test.rar"),
                 dict(
-                    response_data=zip7_file.read_bytes(),
+                    response_data=rar_file,
                     headers={"Content-Type": "application/vnd.rar"},
                 ),
             ),

From 7f3f2226f6421288acad6475d457d45939b80f82 Mon Sep 17 00:00:00 2001
From: Dom Whewell <dom.whewell@sage.com>
Date: Mon, 6 Jan 2025 16:32:25 +0000
Subject: [PATCH 18/29] include the test.rar file in the python test

---
 bbot/test/bbot_fixtures.py                         |   7 -------
 bbot/test/test.rar                                 | Bin 93 -> 0 bytes
 .../module_tests/test_module_unarchive.py          |   5 ++---
 3 files changed, 2 insertions(+), 10 deletions(-)
 delete mode 100644 bbot/test/test.rar

diff --git a/bbot/test/bbot_fixtures.py b/bbot/test/bbot_fixtures.py
index 1498da7eaf..070df6e9a3 100644
--- a/bbot/test/bbot_fixtures.py
+++ b/bbot/test/bbot_fixtures.py
@@ -49,13 +49,6 @@ def tempapkfile():
     return apk_file
 
 
-def temprarfile():
-    current_dir = Path(__file__).parent
-    with open(current_dir / "test.rar", "rb") as f:
-        rar_file = f.read()
-    return rar_file
-
-
 @pytest.fixture
 def clean_default_config(monkeypatch):
     clean_config = OmegaConf.merge(
diff --git a/bbot/test/test.rar b/bbot/test/test.rar
deleted file mode 100644
index c900503caa5b846c30c1b1717081d19a95dfb411..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 93
zcmWGaEK-zWXJjy*wDl<$BP$yND<fk=1H&S}zGO8fX70%hERz|QG&7wPOk-<cVB{!C
tEiTb3sVL!M77m_Pnm+BbG+RhUX0ZYgB`Sa<71A<uQf14-0-0G^7yyc<8TSAH

diff --git a/bbot/test/test_step_2/module_tests/test_module_unarchive.py b/bbot/test/test_step_2/module_tests/test_module_unarchive.py
index 2972429b0f..6e68d4cf6c 100644
--- a/bbot/test/test_step_2/module_tests/test_module_unarchive.py
+++ b/bbot/test/test_step_2/module_tests/test_module_unarchive.py
@@ -1,7 +1,7 @@
 import asyncio
 
 from pathlib import Path
-from .base import ModuleTestBase, temprarfile
+from .base import ModuleTestBase
 
 
 class TestUnarchive(ModuleTestBase):
@@ -40,7 +40,6 @@ async def setup_after_prep(self, module_test):
             )
             stdout, stderr = await process.communicate()
             assert process.returncode == 0, f"Command {command} failed with error: {stderr.decode()}"
-        rar_file = temprarfile()
 
         module_test.set_expect_requests(
             dict(uri="/"),
@@ -105,7 +104,7 @@ async def setup_after_prep(self, module_test):
             module_test.set_expect_requests(
                 dict(uri="/test.rar"),
                 dict(
-                    response_data=rar_file,
+                    response_data=b"Rar!\x1a\x07\x01\x003\x92\xb5\xe5\n\x01\x05\x06\x00\x05\x01\x01\x80\x80\x00\xa2N\x8ec&\x02\x03\x0b\x93\x00\x04\x93\x00\xa4\x83\x02\xc9\x11f\x06\x80\x00\x01\x08test.txt\n\x03\x13S\x96ug\x96\xf3\x1b\x06This is a test file\x1dwVQ\x03\x05\x04\x00",
                     headers={"Content-Type": "application/vnd.rar"},
                 ),
             ),

From 5589a02e03c479b6da37c6414494a1ef1c2150fb Mon Sep 17 00:00:00 2001
From: Dom Whewell <dom.whewell@sage.com>
Date: Sat, 11 Jan 2025 14:32:54 +0000
Subject: [PATCH 19/29] Dont create the directory without checking the
 compression type first

---
 bbot/modules/internal/unarchive.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/bbot/modules/internal/unarchive.py b/bbot/modules/internal/unarchive.py
index f3fb980a68..bf7b118526 100644
--- a/bbot/modules/internal/unarchive.py
+++ b/bbot/modules/internal/unarchive.py
@@ -44,6 +44,7 @@ async def handle_event(self, event):
         # Use the appropriate extraction method based on the file type
         self.info(f"Extracting {path} to {output_dir}")
         success = await self.extract_file(path, output_dir)
+        output_dir.listdir()
 
         # If the extraction was successful, emit the event
         if success:
@@ -58,12 +59,12 @@ async def handle_event(self, event):
             output_dir.rmdir()
 
     async def extract_file(self, path, output_dir):
-        if not output_dir.exists():
-            self.helpers.mkdir(output_dir)
         extension, mime_type, description, confidence = get_magic_info(path)
         compression_format = get_compression(mime_type)
         cmd_list = self.compression_methods.get(compression_format, [])
         if cmd_list:
+            if not output_dir.exists():
+                self.helpers.mkdir(output_dir)
             command = [s.format(filename=path, extract_dir=output_dir) for s in cmd_list]
             try:
                 await self.run_process(command, check=True)

From 9a787688c7250751d4b6a446af1d783512ef19c9 Mon Sep 17 00:00:00 2001
From: Dom Whewell <dom.whewell@sage.com>
Date: Sat, 11 Jan 2025 14:33:22 +0000
Subject: [PATCH 20/29] List out the files in the rar folder to see why ubuntu
 is failing in github actions

---
 bbot/test/test_step_2/module_tests/test_module_unarchive.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bbot/test/test_step_2/module_tests/test_module_unarchive.py b/bbot/test/test_step_2/module_tests/test_module_unarchive.py
index 6e68d4cf6c..12473212d8 100644
--- a/bbot/test/test_step_2/module_tests/test_module_unarchive.py
+++ b/bbot/test/test_step_2/module_tests/test_module_unarchive.py
@@ -198,7 +198,7 @@ def check(self, module_test, events):
         extract_event = [e for e in filesystem_events if "test_rar" in e.data["path"] and "folder" in e.tags]
         assert 1 == len(extract_event), "Failed to extract rar"
         extract_path = Path(extract_event[0].data["path"]) / "test.txt"
-        assert extract_path.is_file(), "Failed to extract the test file"
+        assert extract_path.is_file(), list(extract_path.iterdir())
 
         # LZMA
         lzma_file_event = [e for e in filesystem_events if "test.lzma" in e.data["path"]]

From 33408822a99cafdb23fe33287ea10bae989095db Mon Sep 17 00:00:00 2001
From: Dom Whewell <dom.whewell@sage.com>
Date: Sat, 11 Jan 2025 14:35:50 +0000
Subject: [PATCH 21/29] List the parent folder

---
 bbot/test/test_step_2/module_tests/test_module_unarchive.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bbot/test/test_step_2/module_tests/test_module_unarchive.py b/bbot/test/test_step_2/module_tests/test_module_unarchive.py
index 12473212d8..d6894a3f46 100644
--- a/bbot/test/test_step_2/module_tests/test_module_unarchive.py
+++ b/bbot/test/test_step_2/module_tests/test_module_unarchive.py
@@ -198,7 +198,7 @@ def check(self, module_test, events):
         extract_event = [e for e in filesystem_events if "test_rar" in e.data["path"] and "folder" in e.tags]
         assert 1 == len(extract_event), "Failed to extract rar"
         extract_path = Path(extract_event[0].data["path"]) / "test.txt"
-        assert extract_path.is_file(), list(extract_path.iterdir())
+        assert extract_path.is_file(), list(extract_path.parent.iterdir())
 
         # LZMA
         lzma_file_event = [e for e in filesystem_events if "test.lzma" in e.data["path"]]

From a50a77560140ef5e6ef492a73402b424b39c0435 Mon Sep 17 00:00:00 2001
From: Dom Whewell <dom.whewell@sage.com>
Date: Sat, 11 Jan 2025 15:18:29 +0000
Subject: [PATCH 22/29] Dont accept files that are not compressed

---
 bbot/modules/internal/unarchive.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/bbot/modules/internal/unarchive.py b/bbot/modules/internal/unarchive.py
index bf7b118526..9a5f604977 100644
--- a/bbot/modules/internal/unarchive.py
+++ b/bbot/modules/internal/unarchive.py
@@ -31,8 +31,11 @@ async def filter_event(self, event):
         if "file" in event.tags:
             if event.data["magic_mime_type"] in self.ignore_compressions:
                 return False, f"Ignoring file type: {event.data['magic_mime_type']}, {event.data['path']}"
-            if not event.data["compression"] in self.compression_methods:
-                return False, f"Extract unable to handle file type: {event.data['compression']}, {event.data['path']}"
+            if "compression" in event.data:
+                if not event.data["compression"] in self.compression_methods:
+                   return False, f"Extract unable to handle file type: {event.data['compression']}, {event.data['path']}"
+            else:
+                return False, f"Event is not a compressed file: {event.data['path']}"
         else:
             return False, "Event is not a file"
         return True

From 849924b37b776dce300cd433d439528dc210e192 Mon Sep 17 00:00:00 2001
From: Dom Whewell <dom.whewell@sage.com>
Date: Sat, 11 Jan 2025 15:59:35 +0000
Subject: [PATCH 23/29] We dont want trufflehog re-scanning folders it has
 already done

---
 bbot/modules/internal/unarchive.py | 2 +-
 bbot/modules/trufflehog.py         | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/bbot/modules/internal/unarchive.py b/bbot/modules/internal/unarchive.py
index 9a5f604977..5d27a4722b 100644
--- a/bbot/modules/internal/unarchive.py
+++ b/bbot/modules/internal/unarchive.py
@@ -54,7 +54,7 @@ async def handle_event(self, event):
             await self.emit_event(
                 {"path": str(output_dir)},
                 "FILESYSTEM",
-                tags="folder",
+                tags=["folder", "unarchived-folder"],
                 parent=event,
                 context=f'extracted "{path}" to: {output_dir}',
             )
diff --git a/bbot/modules/trufflehog.py b/bbot/modules/trufflehog.py
index 7b48f37d56..78ae972124 100644
--- a/bbot/modules/trufflehog.py
+++ b/bbot/modules/trufflehog.py
@@ -76,8 +76,8 @@ async def filter_event(self, event):
             else:
                 return False, "Deleted forks is not enabled"
         else:
-            if "parsed-folder" in event.tags:
-                return False, "Not accepting parsed-folder events"
+            if "unarchived-folder" in event.tags:
+                return False, "Not accepting unarchived-folder events"
         return True
 
     async def handle_event(self, event):

From b58288b8d2d782d2bf265d8df915b0c150b32d2a Mon Sep 17 00:00:00 2001
From: Dom Whewell <dom.whewell@sage.com>
Date: Sun, 12 Jan 2025 16:47:32 +0000
Subject: [PATCH 24/29] Remove failing line

---
 bbot/modules/internal/unarchive.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/bbot/modules/internal/unarchive.py b/bbot/modules/internal/unarchive.py
index 5d27a4722b..45fb3a23fe 100644
--- a/bbot/modules/internal/unarchive.py
+++ b/bbot/modules/internal/unarchive.py
@@ -47,7 +47,6 @@ async def handle_event(self, event):
         # Use the appropriate extraction method based on the file type
         self.info(f"Extracting {path} to {output_dir}")
         success = await self.extract_file(path, output_dir)
-        output_dir.listdir()
 
         # If the extraction was successful, emit the event
         if success:

From 91b71f58582493de5f830af40b90fe687575efd1 Mon Sep 17 00:00:00 2001
From: Dom Whewell <dom.whewell@sage.com>
Date: Sun, 12 Jan 2025 17:58:46 +0000
Subject: [PATCH 25/29] Try with an older rar file created on ubuntu:22.04

---
 bbot/test/test_step_2/module_tests/test_module_unarchive.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bbot/test/test_step_2/module_tests/test_module_unarchive.py b/bbot/test/test_step_2/module_tests/test_module_unarchive.py
index d6894a3f46..98dd60ea8d 100644
--- a/bbot/test/test_step_2/module_tests/test_module_unarchive.py
+++ b/bbot/test/test_step_2/module_tests/test_module_unarchive.py
@@ -104,7 +104,7 @@ async def setup_after_prep(self, module_test):
             module_test.set_expect_requests(
                 dict(uri="/test.rar"),
                 dict(
-                    response_data=b"Rar!\x1a\x07\x01\x003\x92\xb5\xe5\n\x01\x05\x06\x00\x05\x01\x01\x80\x80\x00\xa2N\x8ec&\x02\x03\x0b\x93\x00\x04\x93\x00\xa4\x83\x02\xc9\x11f\x06\x80\x00\x01\x08test.txt\n\x03\x13S\x96ug\x96\xf3\x1b\x06This is a test file\x1dwVQ\x03\x05\x04\x00",
+                    response_data=b"Rar!\x1a\x07\x01\x003\x92\xb5\xe5\n\x01\x05\x06\x00\x05\x01\x01\x80\x80\x00\xcf\xdc\xc5 &\x02\x03\x0b\x94\x00\x04\x94\x00\xa4\x83\x02\x96\x1ai\xd0\x80\x00\x01\x08test.txt\n\x03\x13\xcf\x01\x84g\xc2\xb6\xa6\x12This is a test file\n\x1dwVQ\x03\x05\x04\x00",
                     headers={"Content-Type": "application/vnd.rar"},
                 ),
             ),

From 2548289135c97c06defaa78ddaf61f66282c8844 Mon Sep 17 00:00:00 2001
From: Dom Whewell <dom.whewell@sage.com>
Date: Sun, 12 Jan 2025 18:42:48 +0000
Subject: [PATCH 26/29] Use 7z instead to extract the rar file instead as
 ubuntu:22.04 has a really old version of unrar

---
 bbot/core/helpers/depsinstaller/installer.py                | 1 -
 bbot/modules/internal/unarchive.py                          | 2 +-
 bbot/test/test_step_2/module_tests/test_module_unarchive.py | 2 +-
 3 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/bbot/core/helpers/depsinstaller/installer.py b/bbot/core/helpers/depsinstaller/installer.py
index 48d2f970fa..46a2a88fd9 100644
--- a/bbot/core/helpers/depsinstaller/installer.py
+++ b/bbot/core/helpers/depsinstaller/installer.py
@@ -31,7 +31,6 @@ class DepsInstaller:
         "gcc": "gcc",
         "bash": "bash",
         "which": "which",
-        "unrar": "unrar-free",
         "tar": "tar",
         # debian why are you like this
         "7z": [
diff --git a/bbot/modules/internal/unarchive.py b/bbot/modules/internal/unarchive.py
index 45fb3a23fe..633a64d4ef 100644
--- a/bbot/modules/internal/unarchive.py
+++ b/bbot/modules/internal/unarchive.py
@@ -20,7 +20,7 @@ async def setup(self):
             "bzip2": ["tar", "--overwrite", "-xvjf", "{filename}", "-C", "{extract_dir}/"],
             "xz": ["tar", "--overwrite", "-xvJf", "{filename}", "-C", "{extract_dir}/"],
             "7z": ["7z", "x", '-p""', "-aoa", "{filename}", "-o{extract_dir}/"],
-            "rar": ["unrar", "x", "-o+", "-p-", "{filename}", "{extract_dir}/"],
+            "rar": ["7z", "x", '-p""', "-aoa", "{filename}", "-o{extract_dir}/"],
             "lzma": ["7z", "x", '-p""', "-aoa", "{filename}", "-o{extract_dir}/"],
             "tar": ["tar", "--overwrite", "-xvf", "{filename}", "-C", "{extract_dir}/"],
             "gzip": ["tar", "--overwrite", "-xvzf", "{filename}", "-C", "{extract_dir}/"],
diff --git a/bbot/test/test_step_2/module_tests/test_module_unarchive.py b/bbot/test/test_step_2/module_tests/test_module_unarchive.py
index 98dd60ea8d..d6894a3f46 100644
--- a/bbot/test/test_step_2/module_tests/test_module_unarchive.py
+++ b/bbot/test/test_step_2/module_tests/test_module_unarchive.py
@@ -104,7 +104,7 @@ async def setup_after_prep(self, module_test):
             module_test.set_expect_requests(
                 dict(uri="/test.rar"),
                 dict(
-                    response_data=b"Rar!\x1a\x07\x01\x003\x92\xb5\xe5\n\x01\x05\x06\x00\x05\x01\x01\x80\x80\x00\xcf\xdc\xc5 &\x02\x03\x0b\x94\x00\x04\x94\x00\xa4\x83\x02\x96\x1ai\xd0\x80\x00\x01\x08test.txt\n\x03\x13\xcf\x01\x84g\xc2\xb6\xa6\x12This is a test file\n\x1dwVQ\x03\x05\x04\x00",
+                    response_data=b"Rar!\x1a\x07\x01\x003\x92\xb5\xe5\n\x01\x05\x06\x00\x05\x01\x01\x80\x80\x00\xa2N\x8ec&\x02\x03\x0b\x93\x00\x04\x93\x00\xa4\x83\x02\xc9\x11f\x06\x80\x00\x01\x08test.txt\n\x03\x13S\x96ug\x96\xf3\x1b\x06This is a test file\x1dwVQ\x03\x05\x04\x00",
                     headers={"Content-Type": "application/vnd.rar"},
                 ),
             ),

From bad3a4435a651ce6343e7a2406ac9006cc72338c Mon Sep 17 00:00:00 2001
From: Dom Whewell <dom.whewell@sage.com>
Date: Mon, 13 Jan 2025 19:00:54 +0000
Subject: [PATCH 27/29] Adding 7zip plugins for fedora as without it uses 7za

---
 bbot/core/helpers/depsinstaller/installer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bbot/core/helpers/depsinstaller/installer.py b/bbot/core/helpers/depsinstaller/installer.py
index 46a2a88fd9..7deeb9893a 100644
--- a/bbot/core/helpers/depsinstaller/installer.py
+++ b/bbot/core/helpers/depsinstaller/installer.py
@@ -42,7 +42,7 @@ class DepsInstaller:
             },
             {
                 "name": "Install 7zip (Non-Debian)",
-                "package": {"name": ["p7zip"], "state": "present"},
+                "package": {"name": ["p7zip", "7zip-plugins"], "state": "present"},
                 "become": True,
                 "when": "ansible_facts['os_family'] != 'Debian'",
             },

From b21ab37745e76505654b7a07412be34de85b8a94 Mon Sep 17 00:00:00 2001
From: Dom Whewell <dom.whewell@sage.com>
Date: Mon, 13 Jan 2025 19:44:09 +0000
Subject: [PATCH 28/29] Add p7zip-plugins on fedora

---
 bbot/core/helpers/depsinstaller/installer.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/bbot/core/helpers/depsinstaller/installer.py b/bbot/core/helpers/depsinstaller/installer.py
index 7deeb9893a..ae27bee2c1 100644
--- a/bbot/core/helpers/depsinstaller/installer.py
+++ b/bbot/core/helpers/depsinstaller/installer.py
@@ -42,10 +42,16 @@ class DepsInstaller:
             },
             {
                 "name": "Install 7zip (Non-Debian)",
-                "package": {"name": ["p7zip", "7zip-plugins"], "state": "present"},
+                "package": {"name": ["p7zip"], "state": "present"},
                 "become": True,
                 "when": "ansible_facts['os_family'] != 'Debian'",
             },
+            {
+                "name": "Install p7zip-plugins (Fedora)",
+                "package": {"name": ["p7zip-plugins"], "state": "present"},
+                "become": True,
+                "when": "ansible_facts['distribution'] == 'Fedora'",
+            },
         ],
     }
 

From 7bfb7b0bbd58934fc14a712e5581166db5bb04b5 Mon Sep 17 00:00:00 2001
From: domwhewell-sage <122788350+domwhewell-sage@users.noreply.github.com>
Date: Wed, 15 Jan 2025 16:03:32 +0000
Subject: [PATCH 29/29] How did you fix the bug? I commented the code... !?

---
 bbot/modules/internal/unarchive.py            |  4 +-
 .../module_tests/test_module_unarchive.py     | 74 +++++++++----------
 2 files changed, 38 insertions(+), 40 deletions(-)

diff --git a/bbot/modules/internal/unarchive.py b/bbot/modules/internal/unarchive.py
index 633a64d4ef..6103ce7b7c 100644
--- a/bbot/modules/internal/unarchive.py
+++ b/bbot/modules/internal/unarchive.py
@@ -20,8 +20,8 @@ async def setup(self):
             "bzip2": ["tar", "--overwrite", "-xvjf", "{filename}", "-C", "{extract_dir}/"],
             "xz": ["tar", "--overwrite", "-xvJf", "{filename}", "-C", "{extract_dir}/"],
             "7z": ["7z", "x", '-p""', "-aoa", "{filename}", "-o{extract_dir}/"],
-            "rar": ["7z", "x", '-p""', "-aoa", "{filename}", "-o{extract_dir}/"],
-            "lzma": ["7z", "x", '-p""', "-aoa", "{filename}", "-o{extract_dir}/"],
+            # "rar": ["7z", "x", '-p""', "-aoa", "{filename}", "-o{extract_dir}/"],
+            # "lzma": ["7z", "x", '-p""', "-aoa", "{filename}", "-o{extract_dir}/"],
             "tar": ["tar", "--overwrite", "-xvf", "{filename}", "-C", "{extract_dir}/"],
             "gzip": ["tar", "--overwrite", "-xvzf", "{filename}", "-C", "{extract_dir}/"],
         }
diff --git a/bbot/test/test_step_2/module_tests/test_module_unarchive.py b/bbot/test/test_step_2/module_tests/test_module_unarchive.py
index d6894a3f46..41c96c49c7 100644
--- a/bbot/test/test_step_2/module_tests/test_module_unarchive.py
+++ b/bbot/test/test_step_2/module_tests/test_module_unarchive.py
@@ -20,7 +20,7 @@ async def setup_after_prep(self, module_test):
         bz2_file = temp_path / "test.bz2"
         xz_file = temp_path / "test.xz"
         zip7_file = temp_path / "test.7z"
-        lzma_file = temp_path / "test.lzma"
+        # lzma_file = temp_path / "test.lzma"
         tar_file = temp_path / "test.tar"
         tgz_file = temp_path / "test.tgz"
         commands = [
@@ -29,7 +29,7 @@ async def setup_after_prep(self, module_test):
             ("tar", "-C", f"{temp_path}", "-cvjf", f"{bz2_file}", f"{text_file.name}"),
             ("tar", "-C", f"{temp_path}", "-cvJf", f"{xz_file}", f"{text_file.name}"),
             ("7z", "a", '-p""', "-aoa", f"{zip7_file}", f"{text_file}"),
-            ("tar", "-C", f"{temp_path}", "--lzma", "-cvf", f"{lzma_file}", f"{text_file.name}"),
+            # ("tar", "-C", f"{temp_path}", "--lzma", "-cvf", f"{lzma_file}", f"{text_file.name}"),
             ("tar", "-C", f"{temp_path}", "-cvf", f"{tar_file}", f"{text_file.name}"),
             ("tar", "-C", f"{temp_path}", "-cvzf", f"{tgz_file}", f"{text_file.name}"),
         ]
@@ -49,8 +49,6 @@ async def setup_after_prep(self, module_test):
                 <a href="/test.bz2">
                 <a href="/test.xz">
                 <a href="/test.7z">
-                <a href="/test.rar">
-                <a href="/test.lzma">
                 <a href="/test.tar">
                 <a href="/test.tgz">""",
             ),
@@ -100,24 +98,24 @@ async def setup_after_prep(self, module_test):
                 ),
             ),
         )
-        (
-            module_test.set_expect_requests(
-                dict(uri="/test.rar"),
-                dict(
-                    response_data=b"Rar!\x1a\x07\x01\x003\x92\xb5\xe5\n\x01\x05\x06\x00\x05\x01\x01\x80\x80\x00\xa2N\x8ec&\x02\x03\x0b\x93\x00\x04\x93\x00\xa4\x83\x02\xc9\x11f\x06\x80\x00\x01\x08test.txt\n\x03\x13S\x96ug\x96\xf3\x1b\x06This is a test file\x1dwVQ\x03\x05\x04\x00",
-                    headers={"Content-Type": "application/vnd.rar"},
-                ),
-            ),
-        )
-        (
-            module_test.set_expect_requests(
-                dict(uri="/test.lzma"),
-                dict(
-                    response_data=lzma_file.read_bytes(),
-                    headers={"Content-Type": "application/x-lzma"},
-                ),
-            ),
-        )
+        # (
+        #     module_test.set_expect_requests(
+        #         dict(uri="/test.rar"),
+        #         dict(
+        #             response_data=b"Rar!\x1a\x07\x01\x003\x92\xb5\xe5\n\x01\x05\x06\x00\x05\x01\x01\x80\x80\x00\xa2N\x8ec&\x02\x03\x0b\x93\x00\x04\x93\x00\xa4\x83\x02\xc9\x11f\x06\x80\x00\x01\x08test.txt\n\x03\x13S\x96ug\x96\xf3\x1b\x06This is a test file\x1dwVQ\x03\x05\x04\x00",
+        #             headers={"Content-Type": "application/vnd.rar"},
+        #         ),
+        #     ),
+        # )
+        # (
+        #     module_test.set_expect_requests(
+        #         dict(uri="/test.lzma"),
+        #         dict(
+        #             response_data=lzma_file.read_bytes(),
+        #             headers={"Content-Type": "application/x-lzma"},
+        #         ),
+        #     ),
+        # )
         (
             module_test.set_expect_requests(
                 dict(uri="/test.tar"),
@@ -191,24 +189,24 @@ def check(self, module_test, events):
         assert extract_path.is_file(), "Failed to extract the test file"
 
         # RAR
-        rar_file_event = [e for e in filesystem_events if "test.rar" in e.data["path"]]
-        assert 1 == len(rar_file_event), "No rar file found"
-        file = Path(rar_file_event[0].data["path"])
-        assert file.is_file(), f"File not found at {file}"
-        extract_event = [e for e in filesystem_events if "test_rar" in e.data["path"] and "folder" in e.tags]
-        assert 1 == len(extract_event), "Failed to extract rar"
-        extract_path = Path(extract_event[0].data["path"]) / "test.txt"
-        assert extract_path.is_file(), list(extract_path.parent.iterdir())
+        # rar_file_event = [e for e in filesystem_events if "test.rar" in e.data["path"]]
+        # assert 1 == len(rar_file_event), "No rar file found"
+        # file = Path(rar_file_event[0].data["path"])
+        # assert file.is_file(), f"File not found at {file}"
+        # extract_event = [e for e in filesystem_events if "test_rar" in e.data["path"] and "folder" in e.tags]
+        # assert 1 == len(extract_event), "Failed to extract rar"
+        # extract_path = Path(extract_event[0].data["path"]) / "test.txt"
+        # assert extract_path.is_file(), list(extract_path.parent.iterdir())
 
         # LZMA
-        lzma_file_event = [e for e in filesystem_events if "test.lzma" in e.data["path"]]
-        assert 1 == len(lzma_file_event), "No lzma file found"
-        file = Path(lzma_file_event[0].data["path"])
-        assert file.is_file(), f"File not found at {file}"
-        extract_event = [e for e in filesystem_events if "test_lzma" in e.data["path"] and "folder" in e.tags]
-        assert 1 == len(extract_event), "Failed to extract lzma"
-        extract_path = Path(extract_event[0].data["path"]) / "test.txt"
-        assert extract_path.is_file(), "Failed to extract the test file"
+        # lzma_file_event = [e for e in filesystem_events if "test.lzma" in e.data["path"]]
+        # assert 1 == len(lzma_file_event), "No lzma file found"
+        # file = Path(lzma_file_event[0].data["path"])
+        # assert file.is_file(), f"File not found at {file}"
+        # extract_event = [e for e in filesystem_events if "test_lzma" in e.data["path"] and "folder" in e.tags]
+        # assert 1 == len(extract_event), "Failed to extract lzma"
+        # extract_path = Path(extract_event[0].data["path"]) / "test.txt"
+        # assert extract_path.is_file(), "Failed to extract the test file"
 
         # TAR
         tar_file_event = [e for e in filesystem_events if "test.tar" in e.data["path"]]