From faacb7d55388d26119c2e78f5136d3f920d4d1e7 Mon Sep 17 00:00:00 2001 From: hirokuni-kitahara Date: Thu, 26 Oct 2023 13:50:23 +0900 Subject: [PATCH] add file object model and loading function Signed-off-by: hirokuni-kitahara --- ansible_risk_insight/finder.py | 5 +- ansible_risk_insight/keyutil.py | 8 +++ ansible_risk_insight/model_loader.py | 93 ++++++++++++++++++++++++++++ ansible_risk_insight/models.py | 32 ++++++++++ ansible_risk_insight/parser.py | 30 +++++++++ 5 files changed, 167 insertions(+), 1 deletion(-) diff --git a/ansible_risk_insight/finder.py b/ansible_risk_insight/finder.py index df8008f5..e4b6780c 100644 --- a/ansible_risk_insight/finder.py +++ b/ansible_risk_insight/finder.py @@ -556,7 +556,10 @@ def label_yml_file(yml_path: str = "", yml_body: str = "", task_num_thresh: int label = "" if not body or not data: - label = label_empty_file_by_path(yml_path) if yml_path else "others" + label_by_path = "" + if yml_path: + label_by_path = label_empty_file_by_path(yml_path) + label = label_by_path if label_by_path else "others" elif data and not isinstance(data, list): label = "others" elif could_be_playbook_detail(body, data): diff --git a/ansible_risk_insight/keyutil.py b/ansible_risk_insight/keyutil.py index 999bf4ad..b41b913a 100644 --- a/ansible_risk_insight/keyutil.py +++ b/ansible_risk_insight/keyutil.py @@ -267,3 +267,11 @@ def make_imported_taskfile_key(caller_key, path): normed_path = os.path.normpath(path) key = f"taskfile {parent}taskfile{key_delimiter}{normed_path}" return key + + +def set_file_key(obj): + global_key_prefix = make_global_key_prefix(obj.collection, obj.role) + global_key = "{} {}{}{}{}".format(obj.type, global_key_prefix, obj.type, key_delimiter, obj.defined_in) + local_key = "{} {}{}{}".format(obj.type, obj.type, key_delimiter, obj.defined_in) + obj.key = global_key + obj.local_key = local_key diff --git a/ansible_risk_insight/model_loader.py b/ansible_risk_insight/model_loader.py index 97a96931..2b3529e4 100644 --- a/ansible_risk_insight/model_loader.py +++ b/ansible_risk_insight/model_loader.py @@ -19,6 +19,7 @@ import os import re import yaml +import traceback try: # if `libyaml` is available, use C based loader for performance @@ -35,6 +36,7 @@ ExecutableType, Inventory, InventoryType, + File, LoadType, Play, Playbook, @@ -187,6 +189,8 @@ def load_repository( logger.debug("start loading inventory files") repoObj.inventories = load_inventories(repo_path, basedir=basedir) logger.debug("done ... {} inventory files loaded".format(len(repoObj.inventories))) + repoObj.files = load_files(path=repo_path, basedir=basedir, yaml_label_list=yaml_label_list, load_children=load_children) + logger.debug("done ... {} other files loaded".format(len(repoObj.files))) logger.debug("start loading installed collections") repoObj.installed_collections = load_installed_collections(installed_collections_path) @@ -304,6 +308,93 @@ def load_inventories(path, basedir=""): return inventories +# TODO: need more-detailed labels like `vars`? (currently use the passed one as is) +def load_file( + path, + basedir="", + label="", + body="", + error="", + read=True, + role_name="", + collection_name="", +): + fullpath = os.path.join(basedir, path) + if not os.path.exists(fullpath): + if path and os.path.exists(path): + fullpath = path + + # use passed body/error when provided or when read=False + if body or error or not read: + pass + else: + # otherwise, try reading the file + if os.path.exists(fullpath): + try: + with open(fullpath, "r") as file: + body = file.read() + except Exception: + error = traceback.format_exc() + else: + error = f"File not found: {fullpath}" + + # try reading body as a YAML string + data = None + if body: + try: + data = yaml.safe_load(body) + except Exception: + # ignore exception if any + # because possibly this file is not a YAML file + pass + + defined_in = fullpath + if basedir != "": + if defined_in.startswith(basedir): + defined_in = defined_in[len(basedir) :] + if defined_in.startswith("/"): + defined_in = defined_in[1:] + + fObj = File() + fObj.name = defined_in + fObj.body = body + fObj.data = data + fObj.error = error + fObj.label = label + fObj.defined_in = defined_in + if role_name != "": + fObj.role = role_name + if collection_name != "": + fObj.collection = collection_name + fObj.set_key() + return fObj + + +# load general files that has no task definitions +# e.g. variable files, jinja2 templates and non-ansible files +# TODO: support loading without pre-computed yaml_label_list +# TODO: support non-YAML files +def load_files(path, basedir="", yaml_label_list=None, role_name="", collection_name="", load_children=True): + if not yaml_label_list: + return [] + + files = [] + for (fpath, label, role_info) in yaml_label_list: + if not fpath: + continue + if not label: + continue + # load only `others` files + if label != "others": + continue + f = load_file(path=fpath, basedir=basedir, label=label, role_name=role_name, collection_name=collection_name) + if load_children: + files.append(f) + else: + files.append(f.defined_in) + return files + + def load_play( path, index, @@ -1798,6 +1889,8 @@ def load_object(loadObj): loadObj.taskfiles = current + obj.handlers if hasattr(obj, "modules"): loadObj.modules = obj.modules + if hasattr(obj, "files"): + loadObj.files = obj.files if target_type == LoadType.ROLE: loadObj.roles = [obj.defined_in] diff --git a/ansible_risk_insight/models.py b/ansible_risk_insight/models.py index a22dfef7..95f16423 100644 --- a/ansible_risk_insight/models.py +++ b/ansible_risk_insight/models.py @@ -36,6 +36,7 @@ set_role_key, set_task_key, set_taskfile_key, + set_file_key, set_call_object_key, get_obj_info_by_key, ) @@ -130,6 +131,7 @@ class Load(JSONSerializable): playbooks: list = field(default_factory=list) taskfiles: list = field(default_factory=list) modules: list = field(default_factory=list) + files: list = field(default_factory=list) @dataclass @@ -283,6 +285,34 @@ def __getitem__(self, i): return self.items[i] +@dataclass +class File(object): + type: str = "file" + name: str = "" + key: str = "" + local_key: str = "" + role: str = "" + collection: str = "" + + body: str = "" + data: any = None + error: str = "" + label: str = "" + defined_in: str = "" + + annotations: dict = field(default_factory=dict) + + def set_key(self): + set_file_key(self) + + def children_to_key(self): + return self + + @property + def resolver_targets(self): + return None + + @dataclass class ModuleArgument(object): name: str = "" @@ -2070,6 +2100,8 @@ class Repository(Object, Resolvable): inventories: list = field(default_factory=list) + files: list = field(default_factory=list) + version: str = "" annotations: dict = field(default_factory=dict) diff --git a/ansible_risk_insight/parser.py b/ansible_risk_insight/parser.py index 5e9a54f8..8ae67070 100644 --- a/ansible_risk_insight/parser.py +++ b/ansible_risk_insight/parser.py @@ -38,6 +38,7 @@ load_repository, load_role, load_taskfile, + load_file, ) from .utils import ( split_target_playbook_fullpath, @@ -120,6 +121,7 @@ def run(self, load_data=None, load_json_path="", collection_name_of_project=""): skip_playbook_format_error=self.skip_playbook_format_error, skip_task_format_error=self.skip_task_format_error, include_test_contents=ld.include_test_contents, + yaml_label_list=ld.yaml_label_list, ) except PlaybookFormatError: if not self.skip_playbook_format_error: @@ -222,6 +224,7 @@ def run(self, load_data=None, load_json_path="", collection_name_of_project=""): "taskfiles": [], "modules": [], "playbooks": [], + "files": [], } basedir = ld.path @@ -351,12 +354,35 @@ def run(self, load_data=None, load_json_path="", collection_name_of_project=""): modules.append(m) mappings["modules"].append([module_path, m.key]) + files = [] + for file_path in ld.files: + f = None + try: + label = "others" + if ld.yaml_label_list: + for (_fpath, _label, _) in ld.yaml_label_list: + if _fpath == file_path: + label = _label + f = load_file( + path=file_path, + basedir=basedir, + label=label, + role_name=role_name, + collection_name=collection_name, + ) + except Exception as e: + logger.debug(f"failed to load a file: {e}") + continue + files.append(f) + mappings["files"].append([file_path, f.key]) + logger.debug("roles: {}".format(len(roles))) logger.debug("taskfiles: {}".format(len(taskfiles))) logger.debug("modules: {}".format(len(modules))) logger.debug("playbooks: {}".format(len(playbooks))) logger.debug("plays: {}".format(len(plays))) logger.debug("tasks: {}".format(len(tasks))) + logger.debug("files: {}".format(len(files))) collections = [] projects = [] @@ -387,12 +413,15 @@ def run(self, load_data=None, load_json_path="", collection_name_of_project=""): plays = [p.children_to_key() for p in plays] if len(tasks) > 0: tasks = [t.children_to_key() for t in tasks] + if len(files) > 0: + files = [f.children_to_key() for f in files] # save mappings ld.roles = mappings["roles"] ld.taskfiles = mappings["taskfiles"] ld.playbooks = mappings["playbooks"] ld.modules = mappings["modules"] + ld.files = mappings["files"] definitions = { "collections": collections, @@ -403,6 +432,7 @@ def run(self, load_data=None, load_json_path="", collection_name_of_project=""): "playbooks": playbooks, "plays": plays, "tasks": tasks, + "files": files, } return definitions, ld