Skip to content

Commit

Permalink
Merge pull request #1918 from domwhewell-sage/extract_internal_module
Browse files Browse the repository at this point in the history
New internal module "unarchive"
  • Loading branch information
TheTechromancer authored Jan 15, 2025
2 parents 13e4b10 + 7bfb7b0 commit 12e8aee
Show file tree
Hide file tree
Showing 7 changed files with 334 additions and 7 deletions.
7 changes: 6 additions & 1 deletion bbot/core/helpers/depsinstaller/installer.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ class DepsInstaller:
"gcc": "gcc",
"bash": "bash",
"which": "which",
"unrar": "unrar-free",
"tar": "tar",
# debian why are you like this
"7z": [
Expand All @@ -47,6 +46,12 @@ class DepsInstaller:
"become": True,
"when": "ansible_facts['os_family'] != 'Debian'",
},
{
"name": "Install p7zip-plugins (Fedora)",
"package": {"name": ["p7zip-plugins"], "state": "present"},
"become": True,
"when": "ansible_facts['distribution'] == 'Fedora'",
},
],
}

Expand Down
6 changes: 6 additions & 0 deletions bbot/modules/filedownload.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ class filedownload(BaseModule):
"swp", # Swap File (temporary file, often Vim)
"sxw", # OpenOffice.org Writer document
"tar.gz", # Gzip-Compressed Tar Archive
"tgz", # Gzip-Compressed Tar Archive
"tar", # Tar Archive
"txt", # Plain Text Document
"vbs", # Visual Basic Script
Expand All @@ -76,6 +77,11 @@ class filedownload(BaseModule):
"yaml", # YAML Ain't Markup Language
"yml", # YAML Ain't Markup Language
"zip", # Zip Archive
"lzma", # LZMA Compressed File
"rar", # RAR Compressed File
"7z", # 7-Zip Compressed File
"xz", # XZ Compressed File
"bz2", # Bzip2 Compressed File
],
"max_filesize": "10MB",
"base_64_encoded_file": "false",
Expand Down
79 changes: 79 additions & 0 deletions bbot/modules/internal/unarchive.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
from pathlib import Path
from bbot.modules.internal.base import BaseInternalModule
from bbot.core.helpers.libmagic import get_magic_info, get_compression


class unarchive(BaseInternalModule):
watched_events = ["FILESYSTEM"]
produced_events = ["FILESYSTEM"]
flags = ["passive", "safe"]
meta = {
"description": "Extract different types of files into folders on the filesystem",
"created_date": "2024-12-08",
"author": "@domwhewell-sage",
}

async def setup(self):
self.ignore_compressions = ["application/java-archive", "application/vnd.android.package-archive"]
self.compression_methods = {
"zip": ["7z", "x", '-p""', "-aoa", "{filename}", "-o{extract_dir}/"],
"bzip2": ["tar", "--overwrite", "-xvjf", "{filename}", "-C", "{extract_dir}/"],
"xz": ["tar", "--overwrite", "-xvJf", "{filename}", "-C", "{extract_dir}/"],
"7z": ["7z", "x", '-p""', "-aoa", "{filename}", "-o{extract_dir}/"],
# "rar": ["7z", "x", '-p""', "-aoa", "{filename}", "-o{extract_dir}/"],
# "lzma": ["7z", "x", '-p""', "-aoa", "{filename}", "-o{extract_dir}/"],
"tar": ["tar", "--overwrite", "-xvf", "{filename}", "-C", "{extract_dir}/"],
"gzip": ["tar", "--overwrite", "-xvzf", "{filename}", "-C", "{extract_dir}/"],
}
return True

async def filter_event(self, event):
if "file" in event.tags:
if event.data["magic_mime_type"] in self.ignore_compressions:
return False, f"Ignoring file type: {event.data['magic_mime_type']}, {event.data['path']}"
if "compression" in event.data:
if not event.data["compression"] in self.compression_methods:
return False, f"Extract unable to handle file type: {event.data['compression']}, {event.data['path']}"
else:
return False, f"Event is not a compressed file: {event.data['path']}"
else:
return False, "Event is not a file"
return True

async def handle_event(self, event):
path = Path(event.data["path"])
output_dir = path.parent / path.name.replace(".", "_")

# Use the appropriate extraction method based on the file type
self.info(f"Extracting {path} to {output_dir}")
success = await self.extract_file(path, output_dir)

# If the extraction was successful, emit the event
if success:
await self.emit_event(
{"path": str(output_dir)},
"FILESYSTEM",
tags=["folder", "unarchived-folder"],
parent=event,
context=f'extracted "{path}" to: {output_dir}',
)
else:
output_dir.rmdir()

async def extract_file(self, path, output_dir):
extension, mime_type, description, confidence = get_magic_info(path)
compression_format = get_compression(mime_type)
cmd_list = self.compression_methods.get(compression_format, [])
if cmd_list:
if not output_dir.exists():
self.helpers.mkdir(output_dir)
command = [s.format(filename=path, extract_dir=output_dir) for s in cmd_list]
try:
await self.run_process(command, check=True)
for item in output_dir.iterdir():
if item.is_file():
await self.extract_file(item, output_dir / item.stem)
except Exception as e:
self.warning(f"Error extracting {path}. Error: {e}")
return False
return True
4 changes: 2 additions & 2 deletions bbot/modules/trufflehog.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,8 @@ async def filter_event(self, event):
else:
return False, "Deleted forks is not enabled"
else:
if "parsed-folder" in event.tags:
return False, "Not accepting parsed-folder events"
if "unarchived-folder" in event.tags:
return False, "Not accepting unarchived-folder events"
return True

async def handle_event(self, event):
Expand Down
6 changes: 3 additions & 3 deletions bbot/test/test_step_1/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -342,17 +342,17 @@ async def test_cli_args(monkeypatch, caplog, capsys, clean_default_config):
monkeypatch.setattr("sys.argv", ["bbot", "-y"])
result = await cli._main()
assert result is True
assert "Loaded 5/5 internal modules (aggregate,cloudcheck,dnsresolve,excavate,speculate)" in caplog.text
assert "Loaded 6/6 internal modules (aggregate,cloudcheck,dnsresolve,excavate,speculate,unarchive)" in caplog.text
caplog.clear()
monkeypatch.setattr("sys.argv", ["bbot", "-em", "excavate", "speculate", "-y"])
result = await cli._main()
assert result is True
assert "Loaded 3/3 internal modules (aggregate,cloudcheck,dnsresolve)" in caplog.text
assert "Loaded 4/4 internal modules (aggregate,cloudcheck,dnsresolve,unarchive)" in caplog.text
caplog.clear()
monkeypatch.setattr("sys.argv", ["bbot", "-c", "speculate=false", "-y"])
result = await cli._main()
assert result is True
assert "Loaded 4/4 internal modules (aggregate,cloudcheck,dnsresolve,excavate)" in caplog.text
assert "Loaded 5/5 internal modules (aggregate,cloudcheck,dnsresolve,excavate,unarchive)" in caplog.text

# custom target type
out, err = capsys.readouterr()
Expand Down
10 changes: 9 additions & 1 deletion bbot/test/test_step_1/test_presets.py
Original file line number Diff line number Diff line change
Expand Up @@ -493,7 +493,14 @@ def test_preset_module_resolution(clean_default_config):
# make sure we have the expected defaults
assert not preset.scan_modules
assert set(preset.output_modules) == {"python", "csv", "txt", "json"}
assert set(preset.internal_modules) == {"aggregate", "excavate", "speculate", "cloudcheck", "dnsresolve"}
assert set(preset.internal_modules) == {
"aggregate",
"excavate",
"unarchive",
"speculate",
"cloudcheck",
"dnsresolve",
}
assert preset.modules == set(preset.output_modules).union(set(preset.internal_modules))

# make sure dependency resolution works as expected
Expand Down Expand Up @@ -553,6 +560,7 @@ def test_preset_module_resolution(clean_default_config):
"dnsresolve",
"aggregate",
"excavate",
"unarchive",
"txt",
"httpx",
"csv",
Expand Down
Loading

0 comments on commit 12e8aee

Please sign in to comment.