From d2ed516a3bb5f5daa48a77f69d4f6657d2532628 Mon Sep 17 00:00:00 2001 From: Hirokuni Kitahara Date: Mon, 2 Sep 2024 11:30:58 +0900 Subject: [PATCH] fix issue around role/taskfile detection (#255) Signed-off-by: hirokuni-kitahara --- ansible_risk_insight/cli/__init__.py | 39 +++++++------- ansible_risk_insight/finder.py | 35 ++++++++----- ansible_risk_insight/model_loader.py | 62 +++++++++++++++++------ ansible_risk_insight/parser.py | 2 +- ansible_risk_insight/ram_generator.py | 2 +- ansible_risk_insight/rules/sample_rule.py | 2 +- ansible_risk_insight/tree.py | 2 +- ansible_risk_insight/yaml_utils.py | 8 +-- 8 files changed, 93 insertions(+), 59 deletions(-) diff --git a/ansible_risk_insight/cli/__init__.py b/ansible_risk_insight/cli/__init__.py index 599c71f1..f0926aee 100644 --- a/ansible_risk_insight/cli/__init__.py +++ b/ansible_risk_insight/cli/__init__.py @@ -26,7 +26,7 @@ get_role_metadata, split_name_and_version, ) -from ..finder import list_scan_target, update_the_yaml_target +from ..finder import list_scan_target, update_the_yaml_target, get_yml_list import ansible_risk_insight.logger as logger @@ -72,9 +72,7 @@ def __init__(self): help="if true, do scanning per playbook, role or taskfile (this reduces memory usage while scanning)", ) parser.add_argument( - "--fix", - action="store_true", - help="if true, fix the scanned playbook after performing the inpline replace with ARI suggestions" + "--fix", action="store_true", help="if true, fix the scanned playbook after performing the inpline replace with ARI suggestions" ) parser.add_argument( "--task-num-threshold", @@ -230,28 +228,28 @@ def run(self): logger.debug("ARI suggestion file path: %s", ari_suggestion_file_path) with open(ari_suggestion_file_path) as f: ari_suggestion_data = json.load(f) - targets = ari_suggestion_data['targets'] + targets = ari_suggestion_data["targets"] for i in reversed(range(len(targets))): logger.debug("Nodes dir number: %s", i) - nodes = targets[i]['nodes'] + nodes = targets[i]["nodes"] line_number_list = [] mutated_yaml_list = [] - target_file_path = '' - temp_file_path = '' + target_file_path = "" + temp_file_path = "" for j in range(1, len(nodes)): - node_rules = nodes[j]['rules'] + node_rules = nodes[j]["rules"] for k in reversed(range(len(node_rules))): # loop through from rule 11, as that has the mutation w007_rule = node_rules[k] - if (w007_rule['rule']['rule_id']).lower() == 'w007': - if not w007_rule.get('verdict') and w007_rule: + if (w007_rule["rule"]["rule_id"]).lower() == "w007": + if not w007_rule.get("verdict") and w007_rule: break - mutated_yaml = w007_rule['detail']['mutated_yaml'] - if mutated_yaml == '': + mutated_yaml = w007_rule["detail"]["mutated_yaml"] + if mutated_yaml == "": break temp_data = index_data[each] - if w007_rule['file'][0] not in temp_data: - target_file_path = os.path.join(args.target_name, temp_data, w007_rule['file'][0]) - if temp_file_path != '' and target_file_path != temp_file_path: + if w007_rule["file"][0] not in temp_data: + target_file_path = os.path.join(args.target_name, temp_data, w007_rule["file"][0]) + if temp_file_path != "" and target_file_path != temp_file_path: update_the_yaml_target(target_file_path, line_number_list, mutated_yaml_list) line_number_list = [] mutated_yaml_list = [] @@ -259,17 +257,17 @@ def run(self): temp_file_path = target_file_path else: target_file_path = os.path.join(args.target_name, temp_data) - if temp_file_path != '' and target_file_path != temp_file_path: + if temp_file_path != "" and target_file_path != temp_file_path: update_the_yaml_target(target_file_path, line_number_list, mutated_yaml_list) line_number_list = [] mutated_yaml_list = [] mutated_yaml_list.append(mutated_yaml) temp_file_path = target_file_path - line_number = w007_rule['file'][1] + line_number = w007_rule["file"][1] line_number_list.append(line_number) break # w007 rule with mutated yaml is processed, breaking out of iteration try: - if target_file_path == '' or not mutated_yaml_list or not line_number_list: + if target_file_path == "" or not mutated_yaml_list or not line_number_list: continue update_the_yaml_target(target_file_path, line_number_list, mutated_yaml_list) except Exception as ex: @@ -280,6 +278,8 @@ def run(self): root_install = not args.skip_install if not silent and not pretty: print("Start scanning") + _yml_list = get_yml_list(target_name) + yaml_label_list = [(x["filepath"], x["label"], x["role_info"]) for x in _yml_list] c.evaluate( type=args.target_type, name=target_name, @@ -294,6 +294,7 @@ def run(self): include_test_contents=args.include_tests, load_all_taskfiles=load_all_taskfiles, save_only_rule_result=save_only_rule_result, + yaml_label_list=yaml_label_list, objects=args.objects, out_dir=args.out_dir, ) diff --git a/ansible_risk_insight/finder.py b/ansible_risk_insight/finder.py index 550a4ed2..6fe03bec 100644 --- a/ansible_risk_insight/finder.py +++ b/ansible_risk_insight/finder.py @@ -37,6 +37,7 @@ fqcn_module_name_re = re.compile(r"^[a-z0-9_]+\.[a-z0-9_]+\.[a-z0-9_]+$") module_name_re = re.compile(r"^[a-z0-9_.]+$") +taskfile_path_in_role_re = r".*roles/[^/]+/(tasks|handlers)/.*\.ya?ml" module_dir_patterns = [ "library", @@ -417,7 +418,7 @@ def find_module_dirs(role_root_dir): return module_dirs -def search_taskfiles_for_playbooks(path, taskfile_dir_paths=[]): +def search_taskfiles_for_playbooks(path, taskfile_dir_paths: list = []): # must copy the input here; otherwise, the added items are kept forever search_targets = [p for p in taskfile_dir_paths] for playbook_taskfile_dir_pattern in playbook_taskfile_dir_patterns: @@ -428,7 +429,7 @@ def search_taskfiles_for_playbooks(path, taskfile_dir_paths=[]): found = safe_glob(patterns, recursive=True) for f in found: # taskfiles in role will be loaded when the role is loaded, so skip - if "/roles/" in f: + if re.match(taskfile_path_in_role_re, f): continue # if it is a playbook, skip it if could_be_playbook(f): @@ -701,6 +702,12 @@ def get_role_info_from_path(fpath: str): _path = relative_path.rsplit(_target, 1)[0] role_name = _path.split("/")[-1] + + # if the path is something like "xxxx/roles/tasks" + # it is not an actual role, so skip it + if role_name == p.strip("/"): + continue + role_path = os.path.join(parent_dir, _path) found = True break @@ -951,8 +958,8 @@ def check_and_add_diff_lines(start_line, stop_line, lines, data_copy): def check_diff_and_copy_olddata_to_newdata(line_number_list, lines, new_data): """ - Function to find the old lines which weren't mutated by ARI rules, - it need to be copied to new content as is + Function to find the old lines which weren't mutated by ARI rules, + it need to be copied to new content as is """ if line_number_list and isinstance(line_number_list, list): new_content_last_set = line_number_list[-1] @@ -965,12 +972,12 @@ def check_diff_and_copy_olddata_to_newdata(line_number_list, lines, new_data): def update_and_append_new_line(new_line, old_line, leading_spaces, data_copy): """ - Function to get the leading space for the new ARI mutated line, with - its equivaltent old line with space similar to the old line + Function to get the leading space for the new ARI mutated line, with + its equivaltent old line with space similar to the old line """ line_with_adjusted_space = update_line_with_space(new_line, old_line, leading_spaces) data_copy.append(line_with_adjusted_space) - return '' + return "" def update_the_yaml_target(file_path, line_number_list, new_content_list): @@ -1005,7 +1012,7 @@ def update_the_yaml_target(file_path, line_number_list, new_content_list): temp_content = [] start = start_line_number - 1 end = stop_line_number - 1 - data_copy.append('\n') + data_copy.append("\n") for i in range(start, end): line_number = i if len(lines) == i: @@ -1027,15 +1034,15 @@ def update_the_yaml_target(file_path, line_number_list, new_content_list): lines[line_number] = " " * leading_spaces + new_line_content data_copy.append(lines[line_number]) else: - new_line_key = new_line_content.split(':') - new_key = new_line_key[0].strip(' ') + new_line_key = new_line_content.split(":") + new_key = new_line_key[0].strip(" ") for k in range(start, end): if k < len(lines): old_line_key = lines[k].split(":") if "---" in old_line_key[0]: continue - old_key = old_line_key[0].strip(' ') - if '-' in old_line_key[0] and ':' not in lines[k] and '-' in new_key: + old_key = old_line_key[0].strip(" ") + if "-" in old_line_key[0] and ":" not in lines[k] and "-" in new_key: # diff_in_lines = len(lines) - len(new_lines) leading_spaces = len(lines[k]) - len(lines[k].lstrip()) if diff_in_lines > len(lines): @@ -1052,10 +1059,10 @@ def update_the_yaml_target(file_path, line_number_list, new_content_list): elif old_key == new_key: new_line_content = update_and_append_new_line(new_line_content, lines[k], 0, data_copy) break - elif old_key.rstrip('\n') == new_key: + elif old_key.rstrip("\n") == new_key: new_line_content = update_and_append_new_line(new_line_content, lines[k], 0, data_copy) break - elif old_key.rstrip('\n') in new_key.split('.'): + elif old_key.rstrip("\n") in new_key.split("."): new_line_content = update_and_append_new_line(new_line_content, lines[k], 0, data_copy) break if new_line_content: # if there wasn't a match with old line, so this seems updated by ARI and added to w/o any change diff --git a/ansible_risk_insight/model_loader.py b/ansible_risk_insight/model_loader.py index a5bac534..57b5a486 100644 --- a/ansible_risk_insight/model_loader.py +++ b/ansible_risk_insight/model_loader.py @@ -394,7 +394,7 @@ def load_files(path, basedir="", yaml_label_list=None, role_name="", collection_ return [] files = [] - for (fpath, label, role_info) in yaml_label_list: + for fpath, label, role_info in yaml_label_list: if not fpath: continue if not label: @@ -486,7 +486,7 @@ def load_play( if task_blocks is None: continue last_task_line_num = -1 - for (task_dict, task_jsonpath) in task_blocks: + for task_dict, task_jsonpath in task_blocks: task_loading["total"] += 1 i = task_count error = None @@ -533,7 +533,7 @@ def load_play( if task_blocks is None: continue last_task_line_num = -1 - for (task_dict, task_jsonpath) in task_blocks: + for task_dict, task_jsonpath in task_blocks: i = task_count task_loading["total"] += 1 error = None @@ -580,7 +580,7 @@ def load_play( if task_blocks is None: continue last_task_line_num = -1 - for (task_dict, task_jsonpath) in task_blocks: + for task_dict, task_jsonpath in task_blocks: i = task_count task_loading["total"] += 1 error = None @@ -627,7 +627,7 @@ def load_play( if task_blocks is None: continue last_task_line_num = -1 - for (task_dict, task_jsonpath) in task_blocks: + for task_dict, task_jsonpath in task_blocks: i = task_count task_loading["total"] += 1 error = None @@ -872,10 +872,11 @@ def load_playbooks( patterns.append(os.path.join(path, "tests/**/*.ya?ml")) patterns.append(os.path.join(path, "molecule/**/*.ya?ml")) candidates = safe_glob(patterns, recursive=True) + candidates = [(c, False) for c in candidates] # add files if yaml_label_list is given if yaml_label_list: - for (fpath, label, role_info) in yaml_label_list: + for fpath, label, role_info in yaml_label_list: if label == "playbook": # if it is a playbook in role, it should be scanned by load_role if role_info: @@ -885,17 +886,21 @@ def load_playbooks( if not _fpath.startswith(basedir): _fpath = os.path.join(basedir, fpath) if _fpath not in candidates: - candidates.append(_fpath) + candidates.append((_fpath, True)) playbooks = [] playbook_names = [] - candidates = sorted(list(set(candidates))) - for fpath in candidates: + candidates = sorted(candidates, key=lambda x: x[0]) + loaded = set() + for fpath, from_list in candidates: + if fpath in loaded: + continue + if could_be_playbook(fpath=fpath) and could_be_playbook_detail(fpath=fpath): relative_path = "" if fpath.startswith(path): relative_path = fpath[len(path) :] - if "/roles/" in relative_path: + if "/roles/" in relative_path and not from_list: continue p = None try: @@ -920,6 +925,7 @@ def load_playbooks( else: playbooks.append(p.defined_in) playbook_names.append(p.defined_in) + loaded.add(fpath) if not load_children: playbooks = sorted(playbooks) return playbooks @@ -1192,10 +1198,33 @@ def load_roles( if found_roles: roles_dir_path = found_roles[0] + def is_role_dir(found_dirs: list) -> bool: + # From ansible role doc + # if none of the following dirs are found, we don't treat it as a role dir + role_dir_patterns = set( + [ + "tasks", + "handlers", + "templates", + "files", + "vars", + "defaults", + "meta", + "library", + "module_utils", + "lookup_plugins", + ] + ) + return len(role_dir_patterns.intersection(set(found_dirs))) > 0 + role_dirs = [] if roles_dir_path: - dirs = os.listdir(roles_dir_path) - role_dirs = [os.path.join(roles_dir_path, dir_name) for dir_name in dirs] + dirs = sorted(os.listdir(roles_dir_path)) + for dir_name in dirs: + candidate = os.path.join(roles_dir_path, dir_name) + dirs_in_cand = os.listdir(candidate) + if is_role_dir(dirs_in_cand): + role_dirs.append(candidate) if include_test_contents: test_targets_dir = os.path.join(path, "tests/integration/targets") @@ -1215,7 +1244,7 @@ def load_roles( # add role dirs if yaml_label_list is given if yaml_label_list: - for (fpath, label, role_info) in yaml_label_list: + for fpath, label, role_info in yaml_label_list: if role_info and isinstance(role_info, dict): role_path = role_info.get("path", "") _role_path = role_path @@ -1734,12 +1763,10 @@ def load_taskfiles(path, basedir="", yaml_label_list=None, load_children=True): return [] taskfile_paths = search_taskfiles_for_playbooks(path) - if len(taskfile_paths) == 0: - return [] # add files if yaml_label_list is given if yaml_label_list: - for (fpath, label, role_info) in yaml_label_list: + for fpath, label, role_info in yaml_label_list: if label == "taskfile": # if it is a taskfile in role, it should be scanned by load_role if role_info: @@ -1751,6 +1778,9 @@ def load_taskfiles(path, basedir="", yaml_label_list=None, load_children=True): if _fpath not in taskfile_paths: taskfile_paths.append(_fpath) + if len(taskfile_paths) == 0: + return [] + taskfiles = [] for taskfile_path in taskfile_paths: try: diff --git a/ansible_risk_insight/parser.py b/ansible_risk_insight/parser.py index 8ae67070..92a321aa 100644 --- a/ansible_risk_insight/parser.py +++ b/ansible_risk_insight/parser.py @@ -360,7 +360,7 @@ def run(self, load_data=None, load_json_path="", collection_name_of_project=""): try: label = "others" if ld.yaml_label_list: - for (_fpath, _label, _) in ld.yaml_label_list: + for _fpath, _label, _ in ld.yaml_label_list: if _fpath == file_path: label = _label f = load_file( diff --git a/ansible_risk_insight/ram_generator.py b/ansible_risk_insight/ram_generator.py index a47802c5..00709965 100644 --- a/ansible_risk_insight/ram_generator.py +++ b/ansible_risk_insight/ram_generator.py @@ -95,7 +95,7 @@ def run(self): if self._parallel: joblib.Parallel(n_jobs=-1)(joblib.delayed(self.scan)(i, num, _type, _name) for (i, num, _type, _name) in input_list) else: - for (i, num, _type, _name) in input_list: + for i, num, _type, _name in input_list: self.scan(i, num, _type, _name) def scan(self, i, num, type, name): diff --git a/ansible_risk_insight/rules/sample_rule.py b/ansible_risk_insight/rules/sample_rule.py index a52b4010..aa1756a4 100644 --- a/ansible_risk_insight/rules/sample_rule.py +++ b/ansible_risk_insight/rules/sample_rule.py @@ -33,7 +33,7 @@ class SampleRule(Rule): name: str = "EchoTaskContent" version: str = "v0.0.1" severity: Severity = Severity.NONE - tags: tuple = ("sample") + tags: tuple = "sample" def match(self, ctx: AnsibleRunContext) -> bool: # specify targets to be checked diff --git a/ansible_risk_insight/tree.py b/ansible_risk_insight/tree.py index 0e47d2f6..fedb82cd 100644 --- a/ansible_risk_insight/tree.py +++ b/ansible_risk_insight/tree.py @@ -116,7 +116,7 @@ def recursive_tree_load(self, node_key, src_dst_array, parent_keys=None): return n, parent_keys parent_keys.add(node_key) new_parent_keys = parent_keys.copy() - for (src_key, dst_key) in src_dst_array: + for src_key, dst_key in src_dst_array: children_keys = [] if node_key == src_key: children_keys.append(dst_key) diff --git a/ansible_risk_insight/yaml_utils.py b/ansible_risk_insight/yaml_utils.py index 9eceaa16..e9ecc4dc 100644 --- a/ansible_risk_insight/yaml_utils.py +++ b/ansible_risk_insight/yaml_utils.py @@ -304,9 +304,7 @@ def load(self, stream: Path | StreamTextType) -> Any: data = self.load_all(stream=text) except ParserError as ex: data = None - logger.error( # noqa: TRY400 - "Invalid yaml, verify the file contents and try again. %s", ex - ) + logger.error("Invalid yaml, verify the file contents and try again. %s", ex) # noqa: TRY400 except Exception as ex: print(ex) if preamble_comment is not None and isinstance( @@ -431,7 +429,5 @@ def _post_process_yaml(text: str, *, strip_version_directive: bool = False) -> s # got an empty list item. drop any trailing spaces. lines[i] = line.rstrip() + "\n" - text = "".join( - FormattedEmitter.drop_octothorpe_protection(line) for line in lines - ) + text = "".join(FormattedEmitter.drop_octothorpe_protection(line) for line in lines) return text