Skip to content

Commit

Permalink
Rename to unarchive, move jar exclusions into module and restore helper
Browse files Browse the repository at this point in the history
  • Loading branch information
domwhewell-sage committed Dec 15, 2024
1 parent bf8a1b3 commit 7db38fd
Show file tree
Hide file tree
Showing 5 changed files with 86 additions and 63 deletions.
2 changes: 2 additions & 0 deletions bbot/core/helpers/libmagic.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@ def get_compression(mime_type):
"application/fictionbook2+zip": "zip", # FictionBook 2.0 (Zip)
"application/fictionbook3+zip": "zip", # FictionBook 3.0 (Zip)
"application/gzip": "gzip", # Gzip compressed file
"application/java-archive": "zip", # Java Archive (JAR)
"application/pak": "pak", # PAK archive
"application/vnd.android.package-archive": "zip", # Android package (APK)
"application/vnd.comicbook-rar": "rar", # Comic book archive (RAR)
"application/vnd.comicbook+zip": "zip", # Comic book archive (Zip)
"application/vnd.ms-cab-compressed": "cab", # Microsoft Cabinet archive
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
from bbot.core.helpers.libmagic import get_magic_info, get_compression


class extract(BaseInternalModule):
class unarchive(BaseInternalModule):
watched_events = ["FILESYSTEM"]
produced_events = ["FILESYSTEM"]
flags = ["passive"]
flags = ["passive", "safe"]
meta = {
"description": "Extract different types of files into folders on the filesystem",
"created_date": "2024-12-08",
Expand All @@ -15,6 +15,7 @@ class extract(BaseInternalModule):
deps_apt = ["7zip", "tar", "rar", "unrar", "gunzip"]

async def setup(self):
self.ignore_compressions = ["application/java-archive", "application/vnd.android.package-archive"]
self.compression_methods = {
"zip": ["7z", "x", '-p""', "-aoa", "{filename}", "-o{extract_dir}/"],
"bzip2": ["tar", "--overwrite", "-xvjf", "{filename}", "-C", "{extract_dir}/"],
Expand All @@ -29,6 +30,8 @@ async def setup(self):

async def filter_event(self, event):
if "file" in event.tags:
if event.data["magic_mime_type"] in self.ignore_compressions:
return False, f"Ignoring file type: {event.data['magic_mime_type']}, {event.data['path']}"
if not event.data["compression"] in self.compression_methods:
return False, f"Extract unable to handle file type: {event.data['compression']}, {event.data['path']}"
else:
Expand Down
6 changes: 3 additions & 3 deletions bbot/test/test_step_1/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -326,17 +326,17 @@ async def test_cli_args(monkeypatch, caplog, capsys, clean_default_config):
monkeypatch.setattr("sys.argv", ["bbot", "-y"])
result = await cli._main()
assert result is True
assert "Loaded 6/6 internal modules (aggregate,cloudcheck,dnsresolve,excavate,extract,speculate)" in caplog.text
assert "Loaded 6/6 internal modules (aggregate,cloudcheck,dnsresolve,excavate,unarchive,speculate)" in caplog.text
caplog.clear()
monkeypatch.setattr("sys.argv", ["bbot", "-em", "excavate", "speculate", "-y"])
result = await cli._main()
assert result is True
assert "Loaded 4/4 internal modules (aggregate,cloudcheck,dnsresolve,extract)" in caplog.text
assert "Loaded 4/4 internal modules (aggregate,cloudcheck,dnsresolve,unarchive)" in caplog.text
caplog.clear()
monkeypatch.setattr("sys.argv", ["bbot", "-c", "speculate=false", "-y"])
result = await cli._main()
assert result is True
assert "Loaded 5/5 internal modules (aggregate,cloudcheck,dnsresolve,excavate,extract)" in caplog.text
assert "Loaded 5/5 internal modules (aggregate,cloudcheck,dnsresolve,excavate,unarchive)" in caplog.text

# custom target type
out, err = capsys.readouterr()
Expand Down
4 changes: 2 additions & 2 deletions bbot/test/test_step_1/test_presets.py
Original file line number Diff line number Diff line change
Expand Up @@ -496,7 +496,7 @@ def test_preset_module_resolution(clean_default_config):
assert set(preset.internal_modules) == {
"aggregate",
"excavate",
"extract",
"unarchive",
"speculate",
"cloudcheck",
"dnsresolve",
Expand Down Expand Up @@ -560,7 +560,7 @@ def test_preset_module_resolution(clean_default_config):
"dnsresolve",
"aggregate",
"excavate",
"extract",
"unarchive",
"txt",
"httpx",
"csv",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
from .base import ModuleTestBase


class TestExtract(ModuleTestBase):
class TestUnarchive(ModuleTestBase):
targets = ["http://127.0.0.1:8888"]
modules_overrides = ["filedownload", "httpx", "excavate", "speculate", "extract"]
modules_overrides = ["filedownload", "httpx", "excavate", "speculate", "unarchive"]
temp_path = Path("/tmp/.bbot_test")

# Create a text file to compress
Expand Down Expand Up @@ -52,69 +52,87 @@ async def setup_after_prep(self, module_test):
<a href="/test.tgz">""",
),
)
module_test.set_expect_requests(
dict(uri="/test.zip"),
dict(
response_data=self.zip_file.read_bytes(),
headers={"Content-Type": "application/zip"},
(
module_test.set_expect_requests(
dict(uri="/test.zip"),
dict(
response_data=self.zip_file.read_bytes(),
headers={"Content-Type": "application/zip"},
),
),
),
module_test.set_expect_requests(
dict(uri="/test-zip.zip"),
dict(
response_data=self.zip_zip_file.read_bytes(),
headers={"Content-Type": "application/zip"},
)
(
module_test.set_expect_requests(
dict(uri="/test-zip.zip"),
dict(
response_data=self.zip_zip_file.read_bytes(),
headers={"Content-Type": "application/zip"},
),
),
),
module_test.set_expect_requests(
dict(uri="/test.bz2"),
dict(
response_data=self.bz2_file.read_bytes(),
headers={"Content-Type": "application/x-bzip2"},
)
(
module_test.set_expect_requests(
dict(uri="/test.bz2"),
dict(
response_data=self.bz2_file.read_bytes(),
headers={"Content-Type": "application/x-bzip2"},
),
),
),
module_test.set_expect_requests(
dict(uri="/test.xz"),
dict(
response_data=self.xz_file.read_bytes(),
headers={"Content-Type": "application/x-xz"},
)
(
module_test.set_expect_requests(
dict(uri="/test.xz"),
dict(
response_data=self.xz_file.read_bytes(),
headers={"Content-Type": "application/x-xz"},
),
),
),
module_test.set_expect_requests(
dict(uri="/test.7z"),
dict(
response_data=self.zip7_file.read_bytes(),
headers={"Content-Type": "application/x-7z-compressed"},
)
(
module_test.set_expect_requests(
dict(uri="/test.7z"),
dict(
response_data=self.zip7_file.read_bytes(),
headers={"Content-Type": "application/x-7z-compressed"},
),
),
),
module_test.set_expect_requests(
dict(uri="/test.rar"),
dict(
response_data=self.zip7_file.read_bytes(),
headers={"Content-Type": "application/vnd.rar"},
)
(
module_test.set_expect_requests(
dict(uri="/test.rar"),
dict(
response_data=self.zip7_file.read_bytes(),
headers={"Content-Type": "application/vnd.rar"},
),
),
),
module_test.set_expect_requests(
dict(uri="/test.lzma"),
dict(
response_data=self.lzma_file.read_bytes(),
headers={"Content-Type": "application/x-lzma"},
)
(
module_test.set_expect_requests(
dict(uri="/test.lzma"),
dict(
response_data=self.lzma_file.read_bytes(),
headers={"Content-Type": "application/x-lzma"},
),
),
),
module_test.set_expect_requests(
dict(uri="/test.tar"),
dict(
response_data=self.tar_file.read_bytes(),
headers={"Content-Type": "application/x-tar"},
)
(
module_test.set_expect_requests(
dict(uri="/test.tar"),
dict(
response_data=self.tar_file.read_bytes(),
headers={"Content-Type": "application/x-tar"},
),
),
),
module_test.set_expect_requests(
dict(uri="/test.tgz"),
dict(
response_data=self.tgz_file.read_bytes(),
headers={"Content-Type": "application/x-tgz"},
)
(
module_test.set_expect_requests(
dict(uri="/test.tgz"),
dict(
response_data=self.tgz_file.read_bytes(),
headers={"Content-Type": "application/x-tgz"},
),
),
),
)

def check(self, module_test, events):
filesystem_events = [e for e in events if e.type == "FILESYSTEM"]
Expand Down

0 comments on commit 7db38fd

Please sign in to comment.