From 77d4824f6b08ce08464423d8fc0aa6a5ad5885e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Praszmo?= Date: Thu, 1 Feb 2024 18:09:44 +0100 Subject: [PATCH 1/9] Correctly handle dumps with empty configs --- karton/config_extractor/config_extractor.py | 100 ++++++++++++-------- 1 file changed, 58 insertions(+), 42 deletions(-) diff --git a/karton/config_extractor/config_extractor.py b/karton/config_extractor/config_extractor.py index 3de8602..8c176bb 100644 --- a/karton/config_extractor/config_extractor.py +++ b/karton/config_extractor/config_extractor.py @@ -6,7 +6,7 @@ import os from collections import defaultdict, namedtuple from pathlib import Path -from typing import Any, DefaultDict, Dict, List, Optional +from typing import Any, DefaultDict, Dict, List, Optional, Tuple from karton.core import Config, Karton, Resource, Task from karton.core.resource import ResourceBase @@ -139,17 +139,13 @@ def __init__(self, config: Config) -> None: "config-extractor", "result_attributes", fallback={} ) - def report_config( - self, - task: Task, - config: Dict[str, Any], - sample: ResourceBase, - parent: Optional[ResourceBase] = None, - ) -> None: + def preprocess_config(self, config: Dict[str, Any], parent: Any) -> Tuple[Dict[str, any], List[Task]]: legacy_config = dict(config) legacy_config["type"] = config["family"] del legacy_config["family"] + karton_tasks = [] + # This allows us to spawn karton tasks for special config handling if "store-in-karton" in legacy_config: self.log.info("Karton tasks found in config, sending") @@ -157,19 +153,20 @@ def report_config( for karton_task in legacy_config["store-in-karton"]: task_data = karton_task["task"] payload_data = karton_task["payload"] - payload_data["parent"] = parent or sample - - task = Task(headers=task_data, payload=payload_data) - self.send_task(task) - self.log.info("Sending ripped task %s", task.uid) + karton_tasks.append(Task(headers=task_data, payload=payload_data)) del legacy_config["store-in-karton"] + return legacy_config, karton_tasks - if len(legacy_config.items()) == 1: - self.log.info("Final config is empty, not sending it to the reporter") - return + def report_config( + self, + task: Task, + config: Dict[str, Any], + sample: ResourceBase, + parent: Optional[ResourceBase] = None, + ) -> None: + dhash = config_dhash(config) - dhash = config_dhash(legacy_config) family = config["family"] task = Task( { @@ -179,7 +176,7 @@ def report_config( "quality": task.headers.get("quality", "high"), }, payload={ - "config": legacy_config, + "config": config, "executed_sample": sample, "dhash": dhash, "parent": parent or sample, @@ -212,14 +209,20 @@ def analyze_sample(self, task: Task, sample: ResourceBase) -> None: extractor = create_extractor(self) with sample.download_temporary_file() as temp: # type: ignore extractor.push_file(temp.name) - configs = extractor.config - if configs: - config = configs[0] - self.log.info("Got config: {}".format(json.dumps(config))) - self.report_config(task, config, sample) - else: - self.log.info("Failed to get config") + for config in extractor.config: + legacy_config, karton_tasks = self.preprocess_config(config) + + if len(legacy_config.items()) > 1: + self.log.info("Got config: {}".format(json.dumps(config))) + self.report_config(task, config, sample) + + for child_task in karton_tasks: + child_task.payload["parent"] = sample + self.send_task(child_task) + self.log.info("Sending ripped task %s", task.uid) + + self.log.info("Finished processing sample") def analyze_dumps( self, task: Task, sample: ResourceBase, dump_infos: List[DumpInfo] @@ -273,23 +276,36 @@ def analyze_dumps( for family, config in extractor.configs.items(): dump_basename, dump_data = dump_candidates[family] self.log.info("* (%s) %s => %s", family, dump_basename, json.dumps(config)) - parent = Resource(name=dump_basename, content=dump_data) - task = Task( - { - "type": "sample", - "stage": "analyzed", - "kind": "dump", - "platform": "win32", - "extension": "exe", - }, - payload={ - "sample": parent, - "parent": sample, - "tags": ["dump:win32:exe"], - }, - ) - self.send_task(task) - self.report_config(task, config, sample, parent=parent) + + legacy_config, karton_tasks = self.preprocess_config(config) + karton_task_parent = sample + + if len(legacy_config.items()) > 1: + parent = Resource(name=dump_basename, content=dump_data) + karton_task_parent = parent + task = Task( + { + "type": "sample", + "stage": "analyzed", + "kind": "dump", + "platform": "win32", + "extension": "exe", + }, + payload={ + "sample": parent, + "parent": sample, + "tags": ["dump:win32:exe"], + }, + ) + self.send_task(task) + self.report_config(task, config, sample, parent=parent) + else: + self.log.info("Final config is empty, not sending it to the reporter") + + for child_task in karton_tasks: + child_task.payload["parent"] = karton_task_parent + self.send_task(child_task) + self.log.info("Sending ripped task %s", task.uid) self.log.info("done analysing, results: {}".format(json.dumps(results))) From 8e2bb14dfb88f75295813234330b4e2ef058a235 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Praszmo?= Date: Thu, 1 Feb 2024 18:12:04 +0100 Subject: [PATCH 2/9] Black format --- karton/config_extractor/config_extractor.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/karton/config_extractor/config_extractor.py b/karton/config_extractor/config_extractor.py index 8c176bb..9309dfd 100644 --- a/karton/config_extractor/config_extractor.py +++ b/karton/config_extractor/config_extractor.py @@ -139,7 +139,9 @@ def __init__(self, config: Config) -> None: "config-extractor", "result_attributes", fallback={} ) - def preprocess_config(self, config: Dict[str, Any], parent: Any) -> Tuple[Dict[str, any], List[Task]]: + def preprocess_config( + self, config: Dict[str, Any], parent: Any + ) -> Tuple[Dict[str, any], List[Task]]: legacy_config = dict(config) legacy_config["type"] = config["family"] del legacy_config["family"] From 66e5457daea02a9300cf140f2f444b8c0c6ef8b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Praszmo?= Date: Thu, 1 Feb 2024 18:14:22 +0100 Subject: [PATCH 3/9] Fix types --- karton/config_extractor/config_extractor.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/karton/config_extractor/config_extractor.py b/karton/config_extractor/config_extractor.py index 9309dfd..e0e0883 100644 --- a/karton/config_extractor/config_extractor.py +++ b/karton/config_extractor/config_extractor.py @@ -140,8 +140,8 @@ def __init__(self, config: Config) -> None: ) def preprocess_config( - self, config: Dict[str, Any], parent: Any - ) -> Tuple[Dict[str, any], List[Task]]: + self, config: Dict[str, Any] + ) -> Tuple[Dict[str, Any], List[Task]]: legacy_config = dict(config) legacy_config["type"] = config["family"] del legacy_config["family"] From 2c8faab7ebe4b26a5af2afd1714532f18d0dda6c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Praszmo?= Date: Thu, 1 Feb 2024 18:56:16 +0100 Subject: [PATCH 4/9] Fix typos --- karton/config_extractor/config_extractor.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/karton/config_extractor/config_extractor.py b/karton/config_extractor/config_extractor.py index e0e0883..fd4a1d0 100644 --- a/karton/config_extractor/config_extractor.py +++ b/karton/config_extractor/config_extractor.py @@ -216,8 +216,8 @@ def analyze_sample(self, task: Task, sample: ResourceBase) -> None: legacy_config, karton_tasks = self.preprocess_config(config) if len(legacy_config.items()) > 1: - self.log.info("Got config: {}".format(json.dumps(config))) - self.report_config(task, config, sample) + self.log.info("Got config: {}".format(json.dumps(legacy_config))) + self.report_config(task, legacy_config, sample) for child_task in karton_tasks: child_task.payload["parent"] = sample @@ -300,7 +300,7 @@ def analyze_dumps( }, ) self.send_task(task) - self.report_config(task, config, sample, parent=parent) + self.report_config(task, legacy_config, sample, parent=parent) else: self.log.info("Final config is empty, not sending it to the reporter") From fae8ba8818023955d9f396f117771c246aafaf15 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Praszmo?= Date: Tue, 6 Feb 2024 18:50:02 +0100 Subject: [PATCH 5/9] Update karton/config_extractor/config_extractor.py Co-authored-by: msm-cert <156842376+msm-cert@users.noreply.github.com> --- karton/config_extractor/config_extractor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/karton/config_extractor/config_extractor.py b/karton/config_extractor/config_extractor.py index fd4a1d0..b5e97d1 100644 --- a/karton/config_extractor/config_extractor.py +++ b/karton/config_extractor/config_extractor.py @@ -215,7 +215,7 @@ def analyze_sample(self, task: Task, sample: ResourceBase) -> None: for config in extractor.config: legacy_config, karton_tasks = self.preprocess_config(config) - if len(legacy_config.items()) > 1: + if len(legacy_config) > 1: self.log.info("Got config: {}".format(json.dumps(legacy_config))) self.report_config(task, legacy_config, sample) From bfc25acad61ad4d55ae89fdfe31a18c30afcf4be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Praszmo?= Date: Tue, 6 Feb 2024 19:22:45 +0100 Subject: [PATCH 6/9] Fix family typo --- karton/config_extractor/config_extractor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/karton/config_extractor/config_extractor.py b/karton/config_extractor/config_extractor.py index b5e97d1..4e3234c 100644 --- a/karton/config_extractor/config_extractor.py +++ b/karton/config_extractor/config_extractor.py @@ -169,7 +169,7 @@ def report_config( ) -> None: dhash = config_dhash(config) - family = config["family"] + family = config["type"] task = Task( { "type": "config", From 59ebb0d9ecb993dffc8d77bcaa02995e1443d370 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Praszmo?= Date: Tue, 6 Feb 2024 19:23:45 +0100 Subject: [PATCH 7/9] Fix logging format --- karton/config_extractor/config_extractor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/karton/config_extractor/config_extractor.py b/karton/config_extractor/config_extractor.py index 4e3234c..3b130aa 100644 --- a/karton/config_extractor/config_extractor.py +++ b/karton/config_extractor/config_extractor.py @@ -216,7 +216,7 @@ def analyze_sample(self, task: Task, sample: ResourceBase) -> None: legacy_config, karton_tasks = self.preprocess_config(config) if len(legacy_config) > 1: - self.log.info("Got config: {}".format(json.dumps(legacy_config))) + self.log.info("Got config: %s", json.dumps(config)) self.report_config(task, legacy_config, sample) for child_task in karton_tasks: From 38bcb2c944300114f61bb03490f1461222143c73 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Praszmo?= Date: Tue, 6 Feb 2024 19:30:52 +0100 Subject: [PATCH 8/9] Fix logging message --- karton/config_extractor/config_extractor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/karton/config_extractor/config_extractor.py b/karton/config_extractor/config_extractor.py index 3b130aa..2df380d 100644 --- a/karton/config_extractor/config_extractor.py +++ b/karton/config_extractor/config_extractor.py @@ -216,7 +216,7 @@ def analyze_sample(self, task: Task, sample: ResourceBase) -> None: legacy_config, karton_tasks = self.preprocess_config(config) if len(legacy_config) > 1: - self.log.info("Got config: %s", json.dumps(config)) + self.log.info("Got config: %s", json.dumps(legacy_config)) self.report_config(task, legacy_config, sample) for child_task in karton_tasks: From 6a5e2f3724dc6c8db93198cb50851f1bb7606eef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Praszmo?= Date: Wed, 7 Feb 2024 10:05:53 +0100 Subject: [PATCH 9/9] Apply suggestions from code review --- karton/config_extractor/config_extractor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/karton/config_extractor/config_extractor.py b/karton/config_extractor/config_extractor.py index 2df380d..5ed268a 100644 --- a/karton/config_extractor/config_extractor.py +++ b/karton/config_extractor/config_extractor.py @@ -282,7 +282,7 @@ def analyze_dumps( legacy_config, karton_tasks = self.preprocess_config(config) karton_task_parent = sample - if len(legacy_config.items()) > 1: + if len(legacy_config) > 1: parent = Resource(name=dump_basename, content=dump_data) karton_task_parent = parent task = Task(