From b1586a9a3d929e15fea443963e760ad271d1cfcd Mon Sep 17 00:00:00 2001 From: Yacine Elhamer Date: Fri, 26 Jul 2024 02:41:32 +0100 Subject: [PATCH 1/8] initial commit --- .../drakrun/lib/postprocessing/__init__.py | 10 + .../lib/postprocessing/generate_report.py | 187 ++++++++++++++++++ drakrun/requirements.txt | 1 + 3 files changed, 198 insertions(+) create mode 100644 drakrun/drakrun/lib/postprocessing/generate_report.py diff --git a/drakrun/drakrun/lib/postprocessing/__init__.py b/drakrun/drakrun/lib/postprocessing/__init__.py index fd2f01a5b..53b38469d 100644 --- a/drakrun/drakrun/lib/postprocessing/__init__.py +++ b/drakrun/drakrun/lib/postprocessing/__init__.py @@ -5,6 +5,7 @@ from .compress_ipt import compress_ipt from .crop_dumps import crop_dumps from .generate_graphs import generate_graphs +from .generate_report import build_report from .generate_wireshark_key_file import generate_wireshark_key_file from .index_logs import index_logs from .process_apimon_log import process_apimon_log @@ -44,6 +45,15 @@ class PostprocessPlugin(NamedTuple): requires=["procmon.log"], generates=["process_tree.json"], ), + PostprocessPlugin( + function=build_report, + requires=[ + "metadata.json", + "process_tree,json", + "apimon.log", + "ttps.json", + ], + ), PostprocessPlugin(function=crop_dumps, requires=["dumps"], generates=["dumps.zip"]), PostprocessPlugin(function=compress_ipt, requires=["ipt"], generates=["ipt.zip"]), PostprocessPlugin(function=index_logs, requires=[], generates=["index"]), diff --git a/drakrun/drakrun/lib/postprocessing/generate_report.py b/drakrun/drakrun/lib/postprocessing/generate_report.py new file mode 100644 index 000000000..909779b4f --- /dev/null +++ b/drakrun/drakrun/lib/postprocessing/generate_report.py @@ -0,0 +1,187 @@ +import itertools +from datetime import datetime +from pathlib import Path +from typing import Dict, Iterator, List, Optional, Union + +import orjson + + +def epoch_to_timestring(unix_time: Union[int, float, str]) -> Optional[str]: + # This method converts a unix epoch time into a formated time string. + # Example: + # Input: 1716998460 + # Return: '2024-05-29 17:01:00' + if isinstance(unix_time, str): + unix_time = float(unix_time) + + if not unix_time or unix_time == 0: + # Sometimes the time in the logs would be zero or None + return None + + return str(datetime.fromtimestamp(unix_time)) + + +def parse_metadata(metadata_file: Path) -> Dict: + # This method parses the metadata.json file + # Unix epoch timestamps are converted to printable time strings as well. + with metadata_file.open("r") as f: + metadata = orjson.loads(f.read()) + + metadata["time_started"] = epoch_to_timestring(metadata["time_started"]) + metadata["time_finished"] = epoch_to_timestring(metadata["time_finished"]) + + return metadata + + +def process_key(ppid: int, pid: int) -> str: + # This method defines the way we use to address and differentiate between processes. + # The convention used by default is ppid_pid. + return "_".join((str(ppid), str(pid))) + + +def parse_apicall(apicall: Dict) -> Dict: + # This method takes in an apimon entry and fetches the necessary information from it. + # Unix epoch times are converted to printable time strings. + return { + "TimeStamp": epoch_to_timestring(apicall["TimeStamp"]), + "CalledFrom": apicall["CalledFrom"], + "Method": apicall["Method"], + "ReturnValue": apicall["ReturnValue"], + "Argument": [arg.split("=", maxsplit=1)[1] for arg in apicall["Arguments"]], + } + + +def parse_apimon(processes: Dict, apimon_file: Path) -> None: + # This method parses each entry of the apimon.log file and appends + # it to the appropriate process in the report. + with apimon_file.open("r", errors="ignore") as f: + for line in f: + call = orjson.loads(line) + if call["Event"] == "api_called": + pkey = process_key(call["PPID"], call["PID"]) + processes[pkey]["api_calls"].append(parse_apicall(call)) + + for pkey, process in processes.items(): + grouped_api_calls = [ + list(j) + for i, j in itertools.groupby( + process["api_calls"], + key=lambda call: ( + call["CalledFrom"], + call["Method"], + call["ReturnValue"], + call["Argument"], + ), + ) + ] + api_calls = list() + for calls_group in grouped_api_calls: + api_call = dict() + api_call.update(calls_group[0] | {"Repeated": len(calls_group) - 1}) + api_calls.append(api_call) + process["api_calls"] = api_calls + + +def parse_ttps(processes: Dict, ttps_file: Path) -> None: + # This method parses the TTPs in the ttps.json file and appends + # it to the appropriate process in the report. + with ttps_file.open("r") as f: + for line in f: + ttp: Dict = orjson.loads(line) + occurrences = ttp.pop("occurrences") + for occurrence in occurrences: + pkey = process_key(occurrence["ppid"], occurrence["pid"]) + processes[pkey]["ttps"].append(ttp) + + +def parse_memdumps(processes: Dict, memdumps_file: Path) -> None: + # This method parses the memdump.log file and appends all memory dump + # information into the appropriate process in the report + with memdumps_file.open("r") as f: + for line in f: + memdump: Dict = orjson.loads(line) + pkey = process_key(memdump["PPID"], memdump["PID"]) + processes[pkey]["memdumps"].append( + { + "reason": memdump["DumpReason"], + "addr": memdump["DumpAddr"], + "size": memdump["DumpSize"], + "filename": memdump["DumpFilename"], + "count": memdump["DumpsCount"], + } + ) + + +def parse_processtree(processtree_file: Path) -> List[Dict]: + # This method extracts all the processes and their associated information + # from the process_tree.json file. + def rec(processes: List[Dict], parent=0) -> Iterator[Dict]: + # This is a helper recursive function that parses the process tree + for process in processes: + yield { + "pid": process["pid"], + "ppid": parent, + "procname": process["procname"], + "args": process["args"], + "ts_from": epoch_to_timestring(process["ts_from"]), + "ts_to": epoch_to_timestring(process["ts_to"]), + "children": [ + process_key(process["pid"], child["pid"]) + for child in process["children"] + ], + "api_calls": [], # to be filled later by parse_apimon() + "ttps": [], # to be filled later by parse_ttps() + "memdumps": [], # to be filled later by parse_memdumps() + } + yield from rec(process["children"], parent=process["pid"]) + + with processtree_file.open("r") as f: + processtree = orjson.loads(f.read()) + + return { + process_key(process["ppid"], process["pid"]): process + for process in rec(processtree) + } + + +def get_metadata(analysis_dir: Path) -> Dict: + # Currently, all metadata is contained in the metadata.json file + return parse_metadata(analysis_dir / "metadata.json") + + +def get_processes(analysis_dir: Path) -> Dict: + # generate a dictionary of indexed processes + processes = parse_processtree(analysis_dir / "process_tree.json") + # parse api calls into the indexed process dictionary + parse_apimon(processes, analysis_dir / "apimon.log") + # parse ttps into the indexed process dictionary + parse_ttps(processes, analysis_dir / "ttps.json") + # parse memory dumps log into the indexed process dictionary + parse_memdumps(processes, analysis_dir / "memdump.log") + + return processes + + +def build_report(analysis_dir: Path) -> None: + report = dict() + + report.update({"info": get_metadata(analysis_dir)}) + report.update({"processes": get_processes(analysis_dir)}) + + with (analysis_dir / "report.json").open("wb") as f: + f.write(orjson.dumps(report, option=orjson.OPT_INDENT_2)) + + +if __name__ == "__main__": + from sys import argv + + if len(argv) < 2: + print("missing analysis directory") + exit(1) + + analysis_dir = Path(argv[1]) + if not analysis_dir.exists() or not any(analysis_dir.iterdir()): + print("analysis directory is empty or non-existant") + exit(1) + + build_report(analysis_dir) diff --git a/drakrun/requirements.txt b/drakrun/requirements.txt index 39c1a843b..087dee4bb 100644 --- a/drakrun/requirements.txt +++ b/drakrun/requirements.txt @@ -16,6 +16,7 @@ regex==2020.7.14 ipython==8.10.0 malduck==4.1.0 mwdblib==3.4.1 +orjson==3.9.10 # Something went wrong in 4.3.0 package yara-python<4.3.0 mslex==1.1.0 From 2606ed324e3fc7a5e101621a7e06016cb1704e73 Mon Sep 17 00:00:00 2001 From: Yacine <16624109+yelhamer@users.noreply.github.com> Date: Wed, 7 Aug 2024 14:42:38 +0100 Subject: [PATCH 2/8] Update drakrun/drakrun/lib/postprocessing/generate_report.py Co-authored-by: msm-cert <156842376+msm-cert@users.noreply.github.com> --- drakrun/drakrun/lib/postprocessing/generate_report.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drakrun/drakrun/lib/postprocessing/generate_report.py b/drakrun/drakrun/lib/postprocessing/generate_report.py index 909779b4f..e306c218c 100644 --- a/drakrun/drakrun/lib/postprocessing/generate_report.py +++ b/drakrun/drakrun/lib/postprocessing/generate_report.py @@ -163,8 +163,7 @@ def get_processes(analysis_dir: Path) -> Dict: def build_report(analysis_dir: Path) -> None: - report = dict() - + report = {} report.update({"info": get_metadata(analysis_dir)}) report.update({"processes": get_processes(analysis_dir)}) From 4ba5acaef9dc322632b1c2c3a9941294e2dedd63 Mon Sep 17 00:00:00 2001 From: Yacine <16624109+yelhamer@users.noreply.github.com> Date: Wed, 7 Aug 2024 14:43:00 +0100 Subject: [PATCH 3/8] Update drakrun/drakrun/lib/postprocessing/generate_report.py Co-authored-by: msm-cert <156842376+msm-cert@users.noreply.github.com> --- drakrun/drakrun/lib/postprocessing/generate_report.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drakrun/drakrun/lib/postprocessing/generate_report.py b/drakrun/drakrun/lib/postprocessing/generate_report.py index e306c218c..b0caea0f3 100644 --- a/drakrun/drakrun/lib/postprocessing/generate_report.py +++ b/drakrun/drakrun/lib/postprocessing/generate_report.py @@ -36,7 +36,7 @@ def parse_metadata(metadata_file: Path) -> Dict: def process_key(ppid: int, pid: int) -> str: # This method defines the way we use to address and differentiate between processes. # The convention used by default is ppid_pid. - return "_".join((str(ppid), str(pid))) + return f"{ppid}_{pid}" def parse_apicall(apicall: Dict) -> Dict: From 60e48d847185bf136a382929617d336875f2be61 Mon Sep 17 00:00:00 2001 From: Yacine Elhamer Date: Wed, 7 Aug 2024 15:17:11 +0100 Subject: [PATCH 4/8] remove dependencies and handle possibility of them non-existing --- drakrun/drakrun/lib/postprocessing/__init__.py | 8 ++------ drakrun/drakrun/lib/postprocessing/generate_report.py | 9 ++++++--- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/drakrun/drakrun/lib/postprocessing/__init__.py b/drakrun/drakrun/lib/postprocessing/__init__.py index 53b38469d..c4ebc67db 100644 --- a/drakrun/drakrun/lib/postprocessing/__init__.py +++ b/drakrun/drakrun/lib/postprocessing/__init__.py @@ -47,12 +47,8 @@ class PostprocessPlugin(NamedTuple): ), PostprocessPlugin( function=build_report, - requires=[ - "metadata.json", - "process_tree,json", - "apimon.log", - "ttps.json", - ], + requires=[], + generates=["report.json"], ), PostprocessPlugin(function=crop_dumps, requires=["dumps"], generates=["dumps.zip"]), PostprocessPlugin(function=compress_ipt, requires=["ipt"], generates=["ipt.zip"]), diff --git a/drakrun/drakrun/lib/postprocessing/generate_report.py b/drakrun/drakrun/lib/postprocessing/generate_report.py index 909779b4f..952458cdf 100644 --- a/drakrun/drakrun/lib/postprocessing/generate_report.py +++ b/drakrun/drakrun/lib/postprocessing/generate_report.py @@ -153,11 +153,14 @@ def get_processes(analysis_dir: Path) -> Dict: # generate a dictionary of indexed processes processes = parse_processtree(analysis_dir / "process_tree.json") # parse api calls into the indexed process dictionary - parse_apimon(processes, analysis_dir / "apimon.log") + if (analysis_dir / "apimon.log").is_file(): + parse_apimon(processes, analysis_dir / "apimon.log") # parse ttps into the indexed process dictionary - parse_ttps(processes, analysis_dir / "ttps.json") + if (analysis_dir / "ttps.json").is_file(): + parse_ttps(processes, analysis_dir / "ttps.json") # parse memory dumps log into the indexed process dictionary - parse_memdumps(processes, analysis_dir / "memdump.log") + if (analysis_dir / "memdump.log").is_file(): + parse_memdumps(processes, analysis_dir / "memdump.log") return processes From ede51be8d71c357ad204638fc07adfdca3c210ad Mon Sep 17 00:00:00 2001 From: Yacine Elhamer Date: Thu, 8 Aug 2024 04:28:13 +0100 Subject: [PATCH 5/8] fix time formatting --- drakrun/drakrun/lib/postprocessing/generate_report.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drakrun/drakrun/lib/postprocessing/generate_report.py b/drakrun/drakrun/lib/postprocessing/generate_report.py index efe09d12f..5aa3be494 100644 --- a/drakrun/drakrun/lib/postprocessing/generate_report.py +++ b/drakrun/drakrun/lib/postprocessing/generate_report.py @@ -1,5 +1,5 @@ import itertools -from datetime import datetime +from datetime import datetime, timezone from pathlib import Path from typing import Dict, Iterator, List, Optional, Union @@ -18,7 +18,8 @@ def epoch_to_timestring(unix_time: Union[int, float, str]) -> Optional[str]: # Sometimes the time in the logs would be zero or None return None - return str(datetime.fromtimestamp(unix_time)) + time = datetime.fromtimestamp(unix_time, tz=timezone.utc) + return time.isoformat() def parse_metadata(metadata_file: Path) -> Dict: From 216a8db32be2c16187a8a0c79db6c69a375aee2a Mon Sep 17 00:00:00 2001 From: Yacine <16624109+yelhamer@users.noreply.github.com> Date: Sat, 31 Aug 2024 14:02:45 +0100 Subject: [PATCH 6/8] Update drakrun/drakrun/lib/postprocessing/generate_report.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Paweł Srokosz --- drakrun/drakrun/lib/postprocessing/generate_report.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drakrun/drakrun/lib/postprocessing/generate_report.py b/drakrun/drakrun/lib/postprocessing/generate_report.py index 5aa3be494..10b3c3275 100644 --- a/drakrun/drakrun/lib/postprocessing/generate_report.py +++ b/drakrun/drakrun/lib/postprocessing/generate_report.py @@ -167,9 +167,10 @@ def get_processes(analysis_dir: Path) -> Dict: def build_report(analysis_dir: Path) -> None: - report = {} - report.update({"info": get_metadata(analysis_dir)}) - report.update({"processes": get_processes(analysis_dir)}) + report = { + "info": get_metadata(analysis_dir), + "processes": get_processes(analysis_dir), + } with (analysis_dir / "report.json").open("wb") as f: f.write(orjson.dumps(report, option=orjson.OPT_INDENT_2)) From d2992a7c18273032fd6a4393e2fceb0456eed847 Mon Sep 17 00:00:00 2001 From: Yacine <16624109+yelhamer@users.noreply.github.com> Date: Sat, 31 Aug 2024 14:02:53 +0100 Subject: [PATCH 7/8] Update drakrun/drakrun/lib/postprocessing/generate_report.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Paweł Srokosz --- drakrun/drakrun/lib/postprocessing/generate_report.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drakrun/drakrun/lib/postprocessing/generate_report.py b/drakrun/drakrun/lib/postprocessing/generate_report.py index 10b3c3275..c7ea7bd05 100644 --- a/drakrun/drakrun/lib/postprocessing/generate_report.py +++ b/drakrun/drakrun/lib/postprocessing/generate_report.py @@ -113,7 +113,7 @@ def parse_memdumps(processes: Dict, memdumps_file: Path) -> None: ) -def parse_processtree(processtree_file: Path) -> List[Dict]: +def parse_processtree(processtree_file: Path) -> Dict[str, Dict]: # This method extracts all the processes and their associated information # from the process_tree.json file. def rec(processes: List[Dict], parent=0) -> Iterator[Dict]: From bb1bfa9c39f0d4c47dc0769c9b3e4c25e234dee1 Mon Sep 17 00:00:00 2001 From: psrok1 Date: Mon, 2 Sep 2024 11:52:48 +0200 Subject: [PATCH 8/8] Simplify epoch_to_timestring --- .../drakrun/lib/postprocessing/generate_report.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/drakrun/drakrun/lib/postprocessing/generate_report.py b/drakrun/drakrun/lib/postprocessing/generate_report.py index c7ea7bd05..644735ffa 100644 --- a/drakrun/drakrun/lib/postprocessing/generate_report.py +++ b/drakrun/drakrun/lib/postprocessing/generate_report.py @@ -1,20 +1,17 @@ import itertools from datetime import datetime, timezone from pathlib import Path -from typing import Dict, Iterator, List, Optional, Union +from typing import Dict, Iterator, List, Optional import orjson -def epoch_to_timestring(unix_time: Union[int, float, str]) -> Optional[str]: +def epoch_to_timestring(unix_time: Optional[float]) -> Optional[str]: # This method converts a unix epoch time into a formated time string. # Example: - # Input: 1716998460 + # Input: 1716998460.000 # Return: '2024-05-29 17:01:00' - if isinstance(unix_time, str): - unix_time = float(unix_time) - - if not unix_time or unix_time == 0: + if not unix_time: # Sometimes the time in the logs would be zero or None return None @@ -44,7 +41,7 @@ def parse_apicall(apicall: Dict) -> Dict: # This method takes in an apimon entry and fetches the necessary information from it. # Unix epoch times are converted to printable time strings. return { - "TimeStamp": epoch_to_timestring(apicall["TimeStamp"]), + "TimeStamp": epoch_to_timestring(float(apicall["TimeStamp"])), "CalledFrom": apicall["CalledFrom"], "Method": apicall["Method"], "ReturnValue": apicall["ReturnValue"],