From a4d6ac903aadcb474c936f5dc57c06922388469f Mon Sep 17 00:00:00 2001 From: Sumit Jaiswal Date: Wed, 5 Jun 2024 11:48:00 +0530 Subject: [PATCH 1/9] add fix functionality Signed-off-by: Sumit Jaiswal --- .vscode/launch.json | 23 ++++++++ ansible_risk_insight/cli/__init__.py | 30 +++++++++- ansible_risk_insight/finder.py | 82 ++++++++++++++++++++++++++++ 3 files changed, 133 insertions(+), 2 deletions(-) create mode 100644 .vscode/launch.json diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 00000000..22d3ffb1 --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,23 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "Python Debugger: Remote Attach", + "type": "debugpy", + "request": "attach", + "connect": { + "host": "localhost:5678", + "port": 5678 + }, + "pathMappings": [ + { + "localRoot": "${workspaceFolder}", + "remoteRoot": "." + } + ] + } + ] +} \ No newline at end of file diff --git a/ansible_risk_insight/cli/__init__.py b/ansible_risk_insight/cli/__init__.py index a2963804..b09f5bed 100644 --- a/ansible_risk_insight/cli/__init__.py +++ b/ansible_risk_insight/cli/__init__.py @@ -16,6 +16,7 @@ import os import json +import logging import argparse from ..scanner import ARIScanner, config @@ -26,8 +27,11 @@ get_role_metadata, split_name_and_version, ) -from ..finder import list_scan_target +from ..finder import list_scan_target, update_the_yaml_target +logging.basicConfig( + level=os.environ.get('LOGLEVEL', 'WARNING').upper() +) class ARICLI: args = None @@ -70,6 +74,7 @@ def __init__(self): action="store_true", help="if true, do scanning per playbook, role or taskfile (this reduces memory usage while scanning)", ) + parser.add_argument("--fix", action="store_true", help="if true, fix the scanned playbook after performing the inpline replace with ARI suggestions") parser.add_argument( "--task-num-threshold", default="100", @@ -85,6 +90,7 @@ def __init__(self): def run(self): args = self.args + print("ARI args: ", args.target_name) target_name = args.target_name target_version = "" if args.target_type in ["collection", "role"]: @@ -214,9 +220,29 @@ def run(self): for i, fpath in enumerate(list_per_type): index_data[i] = fpath list_file_path = os.path.join(args.out_dir, f"{scan_type}s", "index.json") + logging.info("list_file_path: ", list_file_path) with open(list_file_path, "w") as file: json.dump(index_data, file) - + if args.fix: + for each in index_data.keys(): + ari_suggestion_file_path = os.path.join(args.out_dir, f"{scan_type}s", str(each), "rule_result.json") + logging.info("ARI suggestion file path: %s", ari_suggestion_file_path) + with open(ari_suggestion_file_path) as f: + ari_suggestion_data = json.load(f) + targets = ari_suggestion_data['targets'] + for i in reversed(range(len(targets)-1)): + nodes = targets[i]['nodes'] + for j in reversed(range(len(nodes))): + node_rules = nodes[j]['rules'] + for k in reversed(range(len(node_rules))): + w007_rule = node_rules[k] + if (w007_rule['rule']['rule_id']).lower() == 'w007': + if not w007_rule.get('verdict') and w007_rule: + break + mutated_yaml = w007_rule['detail']['mutated_yaml'] + target_file_path = os.path.join(args.target_name, index_data[each], w007_rule['file'][0]) + line_number = w007_rule['file'][1] + update_the_yaml_target(target_file_path, line_number, mutated_yaml) else: if not silent and not pretty: print("Start preparing dependencies") diff --git a/ansible_risk_insight/finder.py b/ansible_risk_insight/finder.py index 4bdd1653..2f5abf71 100644 --- a/ansible_risk_insight/finder.py +++ b/ansible_risk_insight/finder.py @@ -19,8 +19,15 @@ import re import os import json +import logging import yaml import traceback +from ansible_risk_insight.yaml_utils import FormattedYAML +from ruamel.yaml.comments import CommentedMap, CommentedSeq + +logging.basicConfig( + level=os.environ.get('LOGLEVEL', 'WARNING').upper() +) try: # if `libyaml` is available, use C based loader for performance @@ -731,3 +738,78 @@ def list_scan_target(root_dir: str, task_num_threshold: int = -1): all_targets = sorted(all_targets, key=lambda x: x["filepath"]) all_targets = sorted(all_targets, key=lambda x: x["scan_type"]) return all_targets + + +def check_and_replace(new_data, old_data, replaced=False): + if new_data == old_data: + logging.info("Current file data and ARI mutated data are same!") + return True + if new_data['name'] == old_data['name']: + # each_task = new_parsed_data + replaced = True + return new_data, replaced + + +def update_the_yaml_target(file_path, line_number, new_content): + input_line_number = line_number.lstrip("L").split("-") + logging.info("Target file path: %s", file_path) + logging.info("Target line number: %s", input_line_number) + logging.info("Target new content %s", new_content) + + # Read the original YAML file + with open(file_path, 'r') as file: + data = file.read() + + yaml = FormattedYAML( + # Ansible only uses YAML 1.1, but others files should use newer 1.2 (ruamel.yaml defaults to 1.2) + ) + # Parse the YAML content with preserved formatting + parsed_data = yaml.load(data) + if not isinstance(parsed_data, CommentedMap | CommentedSeq): + # This is an empty vars file or similar which loads as None. + # It is not safe to write this file or data-loss is likely. + # Only maps and sequences can preserve comments. Skip it. + print( + "Ignored reformatting %s because current implementation in ruamel.yaml would drop comments. See https://sourceforge.net/p/ruamel-yaml/tickets/460/", + file, + ) + new_parsed_data = yaml.load(new_content) + if new_parsed_data == parsed_data: + logging.info("Current data and ARI mutated data are same!") + return + if not new_parsed_data: + return + new_parsed_data = new_parsed_data[0] + # variable to keep a check if there's a change in mutated and existing data + no_change = False + # parsed_data = parsed_data[0] + try: + if isinstance(parsed_data, list): + if parsed_data[0].get('tasks'): + tasks = [each_task for each_task in parsed_data[0]['tasks']] + for i in reversed(range(len(tasks))): + each_task = tasks[i] + output = check_and_replace(new_parsed_data, each_task) + if output: + if isinstance(output, tuple): + parsed_data[0]['tasks'][i] = output[0] + break + no_change = True + break + else: + for i in reversed(range(len(parsed_data))): + output = check_and_replace(new_parsed_data, parsed_data[i]) + if output: + if isinstance(output, tuple) and len(output) > 1: + parsed_data[i] = output[0] + break + no_change = True + break + + if not no_change: + with open(file_path, 'w') as file: + yaml.dump(parsed_data, file) + except Exception as ex: + print("ARI fix functionality failed with: %s", ex) + logging.warning("ARI fix functionality failed with: %s", ex) + return From c983f3412630d3401efa7fb219126d0a25c291a8 Mon Sep 17 00:00:00 2001 From: Sumit Jaiswal Date: Wed, 5 Jun 2024 12:06:06 +0530 Subject: [PATCH 2/9] fix ci Signed-off-by: Sumit Jaiswal --- ansible_risk_insight/cli/__init__.py | 11 +- ansible_risk_insight/finder.py | 11 +- ansible_risk_insight/yaml_utils.py | 439 +++++++++++++++++++++++++++ 3 files changed, 453 insertions(+), 8 deletions(-) create mode 100644 ansible_risk_insight/yaml_utils.py diff --git a/ansible_risk_insight/cli/__init__.py b/ansible_risk_insight/cli/__init__.py index b09f5bed..0489043f 100644 --- a/ansible_risk_insight/cli/__init__.py +++ b/ansible_risk_insight/cli/__init__.py @@ -33,6 +33,7 @@ level=os.environ.get('LOGLEVEL', 'WARNING').upper() ) + class ARICLI: args = None @@ -74,7 +75,11 @@ def __init__(self): action="store_true", help="if true, do scanning per playbook, role or taskfile (this reduces memory usage while scanning)", ) - parser.add_argument("--fix", action="store_true", help="if true, fix the scanned playbook after performing the inpline replace with ARI suggestions") + parser.add_argument( + "--fix", + action="store_true", + help="if true, fix the scanned playbook after performing the inpline replace with ARI suggestions" + ) parser.add_argument( "--task-num-threshold", default="100", @@ -230,7 +235,7 @@ def run(self): with open(ari_suggestion_file_path) as f: ari_suggestion_data = json.load(f) targets = ari_suggestion_data['targets'] - for i in reversed(range(len(targets)-1)): + for i in reversed(range(len(targets) - 1)): nodes = targets[i]['nodes'] for j in reversed(range(len(nodes))): node_rules = nodes[j]['rules'] @@ -241,7 +246,7 @@ def run(self): break mutated_yaml = w007_rule['detail']['mutated_yaml'] target_file_path = os.path.join(args.target_name, index_data[each], w007_rule['file'][0]) - line_number = w007_rule['file'][1] + line_number = w007_rule['file'][1] update_the_yaml_target(target_file_path, line_number, mutated_yaml) else: if not silent and not pretty: diff --git a/ansible_risk_insight/finder.py b/ansible_risk_insight/finder.py index 2f5abf71..97483eb8 100644 --- a/ansible_risk_insight/finder.py +++ b/ansible_risk_insight/finder.py @@ -25,10 +25,6 @@ from ansible_risk_insight.yaml_utils import FormattedYAML from ruamel.yaml.comments import CommentedMap, CommentedSeq -logging.basicConfig( - level=os.environ.get('LOGLEVEL', 'WARNING').upper() -) - try: # if `libyaml` is available, use C based loader for performance import _yaml # noqa: F401 @@ -40,6 +36,10 @@ from .safe_glob import safe_glob from .awx_utils import could_be_playbook, search_playbooks +logging.basicConfig( + level=os.environ.get('LOGLEVEL', 'WARNING').upper() +) + fqcn_module_name_re = re.compile(r"^[a-z0-9_]+\.[a-z0-9_]+\.[a-z0-9_]+$") module_name_re = re.compile(r"^[a-z0-9_.]+$") @@ -770,7 +770,8 @@ def update_the_yaml_target(file_path, line_number, new_content): # It is not safe to write this file or data-loss is likely. # Only maps and sequences can preserve comments. Skip it. print( - "Ignored reformatting %s because current implementation in ruamel.yaml would drop comments. See https://sourceforge.net/p/ruamel-yaml/tickets/460/", + "Ignored reformatting %s because current implementation in ruamel.yaml would drop comments." + + " See https://sourceforge.net/p/ruamel-yaml/tickets/460/", file, ) new_parsed_data = yaml.load(new_content) diff --git a/ansible_risk_insight/yaml_utils.py b/ansible_risk_insight/yaml_utils.py new file mode 100644 index 00000000..048cfdf7 --- /dev/null +++ b/ansible_risk_insight/yaml_utils.py @@ -0,0 +1,439 @@ +"""Utility helpers to simplify working with yaml-based data.""" + +# pylint: disable=too-many-lines +from __future__ import annotations + +import logging +from io import StringIO +from pathlib import Path +from typing import TYPE_CHECKING, Any, cast +import re +from ruamel.yaml.comments import CommentedMap, CommentedSeq +from ruamel.yaml.composer import ComposerError +from ruamel.yaml.constructor import RoundTripConstructor +from ruamel.yaml.emitter import Emitter + +# Module 'ruamel.yaml' does not explicitly export attribute 'YAML'; implicit reexport disabled +# To make the type checkers happy, we import from ruamel.yaml.main instead. +from ruamel.yaml.main import YAML +from ruamel.yaml.parser import ParserError +from ruamel.yaml.scalarint import HexInt, ScalarInt + + +if TYPE_CHECKING: + # noinspection PyProtectedMember + from ruamel.yaml.compat import StreamTextType + from ruamel.yaml.nodes import ScalarNode + from ruamel.yaml.representer import RoundTripRepresenter + +_logger = logging.getLogger(__name__) + + +class OctalIntYAML11(ScalarInt): + """OctalInt representation for YAML 1.1.""" + + # tell mypy that ScalarInt has these attributes + _width: Any + _underscore: Any + + def __new__(cls, *args: Any, **kwargs: Any) -> Any: + """Create a new int with ScalarInt-defined attributes.""" + return ScalarInt.__new__(cls, *args, **kwargs) + + @staticmethod + def represent_octal(representer: RoundTripRepresenter, data: OctalIntYAML11) -> Any: + """Return a YAML 1.1 octal representation. + + Based on ruamel.yaml.representer.RoundTripRepresenter.represent_octal_int() + (which only handles the YAML 1.2 octal representation). + """ + v = format(data, "o") + anchor = data.yaml_anchor(any=True) + # noinspection PyProtectedMember + return representer.insert_underscore( + "0", + v, + data._underscore, # noqa: SLF001 + anchor=anchor, + ) + + +class CustomConstructor(RoundTripConstructor): + """Custom YAML constructor that preserves Octal formatting in YAML 1.1.""" + + def construct_yaml_int(self, node: ScalarNode) -> Any: + """Construct int while preserving Octal formatting in YAML 1.1. + + ruamel.yaml only preserves the octal format for YAML 1.2. + For 1.1, it converts the octal to an int. So, we preserve the format. + + Code partially copied from ruamel.yaml (MIT licensed). + """ + ret = super().construct_yaml_int(node) + if self.resolver.processing_version == (1, 1) and isinstance(ret, int): + # Do not rewrite zero as octal. + if ret == 0: + return ret + # see if we've got an octal we need to preserve. + value_su = self.construct_scalar(node) + try: + v = value_su.rstrip("_") + underscore = [len(v) - v.rindex("_") - 1, False, False] # type: Any + except ValueError: + underscore = None + except IndexError: + underscore = None + value_s = value_su.replace("_", "") + if value_s[0] in "+-": + value_s = value_s[1:] + if value_s[0:2] == "0x": + ret = HexInt(ret, width=len(value_s) - 2) + elif value_s[0] == "0": + # got an octal in YAML 1.1 + ret = OctalIntYAML11( + ret, + width=None, + underscore=underscore, + anchor=node.anchor, + ) + return ret + + +CustomConstructor.add_constructor( + "tag:yaml.org,2002:int", + CustomConstructor.construct_yaml_int, +) + + +class FormattedEmitter(Emitter): + """Emitter that applies custom formatting rules when dumping YAML. + + Differences from ruamel.yaml defaults: + + - indentation of root-level sequences + - prefer double-quoted scalars over single-quoted scalars + + This ensures that root-level sequences are never indented. + All subsequent levels are indented as configured (normal ruamel.yaml behavior). + + Earlier implementations used dedent on ruamel.yaml's dumped output, + but string magic like that had a ton of problematic edge cases. + """ + + preferred_quote = '"' # either " or ' + + min_spaces_inside = 0 + max_spaces_inside = 1 + + _sequence_indent = 2 + _sequence_dash_offset = 0 # Should be _sequence_indent - 2 + _root_is_sequence = False + + _in_empty_flow_map = False + + +# pylint: disable=too-many-instance-attributes +class FormattedYAML(YAML): + """A YAML loader/dumper that handles ansible content better by default.""" + + def __init__( # pylint: disable=too-many-arguments + self, + *, + typ: str | None = None, + pure: bool = False, + output: Any = None, + plug_ins: list[str] | None = None, + version: tuple[int, int] | None = None, + ): + """Return a configured ``ruamel.yaml.YAML`` instance. + + Some config defaults get extracted from the yamllint config. + + ``ruamel.yaml.YAML`` uses attributes to configure how it dumps yaml files. + Some of these settings can be confusing, so here are examples of how different + settings will affect the dumped yaml. + + This example does not indent any sequences: + + .. code:: python + + yaml.explicit_start=True + yaml.map_indent=2 + yaml.sequence_indent=2 + yaml.sequence_dash_offset=0 + + .. code:: yaml + + --- + - name: A playbook + tasks: + - name: Task + + This example indents all sequences including the root-level: + + .. code:: python + + yaml.explicit_start=True + yaml.map_indent=2 + yaml.sequence_indent=4 + yaml.sequence_dash_offset=2 + # yaml.Emitter defaults to ruamel.yaml.emitter.Emitter + + .. code:: yaml + + --- + - name: Playbook + tasks: + - name: Task + + This example indents all sequences except at the root-level: + + .. code:: python + + yaml.explicit_start=True + yaml.map_indent=2 + yaml.sequence_indent=4 + yaml.sequence_dash_offset=2 + yaml.Emitter = FormattedEmitter # custom Emitter prevents root-level indents + + .. code:: yaml + + --- + - name: Playbook + tasks: + - name: Task + """ + if version: + if isinstance(version, str): + x, y = version.split(".", maxsplit=1) + version = (int(x), int(y)) + self._yaml_version_default: tuple[int, int] = version + self._yaml_version: tuple[int, int] = self._yaml_version_default + super().__init__(typ=typ, pure=pure, output=output, plug_ins=plug_ins) + + # NB: We ignore some mypy issues because ruamel.yaml typehints are not great. + + # config = self._defaults_from_yamllint_config() + + # # these settings are derived from yamllint config + # self.explicit_start: bool = config["explicit_start"] # type: ignore[assignment] + # self.explicit_end: bool = config["explicit_end"] # type: ignore[assignment] + # self.width: int = config["width"] # type: ignore[assignment] + # indent_sequences: bool = cast(bool, config["indent_sequences"]) + # preferred_quote: str = cast(str, config["preferred_quote"]) # either ' or " + + # min_spaces_inside: int = cast(int, config["min_spaces_inside"]) + # max_spaces_inside: int = cast(int, config["max_spaces_inside"]) + + self.default_flow_style = False + self.compact_seq_seq = True # type: ignore[assignment] # dash after dash + self.compact_seq_map = True # type: ignore[assignment] # key after dash + + # Do not use yaml.indent() as it obscures the purpose of these vars: + self.map_indent = 2 + self.sequence_indent = 2 + self.sequence_dash_offset = self.sequence_indent - 2 + + # If someone doesn't want our FormattedEmitter, they can change it. + self.Emitter = FormattedEmitter + + # ignore invalid preferred_quote setting + + FormattedEmitter.preferred_quote = '"' + # NB: default_style affects preferred_quote as well. + # self.default_style ∈ None (default), '', '"', "'", '|', '>' + + # spaces inside braces for flow mappings + FormattedEmitter.min_spaces_inside = 0 + FormattedEmitter.max_spaces_inside = 1 + + # We need a custom constructor to preserve Octal formatting in YAML 1.1 + self.Constructor = CustomConstructor + self.Representer.add_representer(OctalIntYAML11, OctalIntYAML11.represent_octal) + + # We should preserve_quotes loads all strings as a str subclass that carries + # a quote attribute. Will the str subclasses cause problems in transforms? + # Are there any other gotchas to this? + # + # This will only preserve quotes for strings read from the file. + # anything modified by the transform will use no quotes, preferred_quote, + # or the quote that results in the least amount of escaping. + + # If needed, we can use this to change null representation to be explicit + # (see https://stackoverflow.com/a/44314840/1134951) + # self.Representer.add_representer( + + + @property + def version(self) -> tuple[int, int] | None: + """Return the YAML version used to parse or dump. + + Ansible uses PyYAML which only supports YAML 1.1. ruamel.yaml defaults to 1.2. + So, we have to make sure we dump yaml files using YAML 1.1. + We can relax the version requirement once ansible uses a version of PyYAML + that includes this PR: https://github.com/yaml/pyyaml/pull/555 + """ + if hasattr(self, "_yaml_version"): + return self._yaml_version + return None + + @version.setter + def version(self, value: tuple[int, int] | None) -> None: + """Ensure that yaml version uses our default value. + + The yaml Reader updates this value based on the ``%YAML`` directive in files. + So, if a file does not include the directive, it sets this to None. + But, None effectively resets the parsing version to YAML 1.2 (ruamel's default). + """ + if value is not None: + self._yaml_version = value + elif hasattr(self, "_yaml_version_default"): + self._yaml_version = self._yaml_version_default + # We do nothing if the object did not have a previous default version defined + + def load(self, stream: Path | StreamTextType) -> Any: + """Load YAML content from a string while avoiding known ruamel.yaml issues.""" + if not isinstance(stream, str): + msg = f"expected a str but got {type(stream)}" + raise NotImplementedError(msg) + # As ruamel drops comments for any document that is not a mapping or sequence, + # we need to avoid using it to reformat those documents. + # https://sourceforge.net/p/ruamel-yaml/tickets/460/ + + text, preamble_comment = self._pre_process_yaml(stream) + try: + data = super().load(stream=text) + except ComposerError: + data = self.load_all(stream=text) + except ParserError as ex: + data = None + _logger.error( # noqa: TRY400 + "Invalid yaml, verify the file contents and try again.", + ) + except Exception as ex: + print(ex) + if preamble_comment is not None and isinstance( + data, + CommentedMap | CommentedSeq, + ): + data.preamble_comment = preamble_comment # type: ignore[union-attr] + # Because data can validly also be None for empty documents, we cannot + # really annotate the return type here, so we need to remember to + # never save None or scalar data types when reformatting. + return data + + def dumps(self, data: Any) -> str: + """Dump YAML document to string (including its preamble_comment).""" + preamble_comment: str | None = getattr(data, "preamble_comment", None) + self._prevent_wrapping_flow_style(data) + with StringIO() as stream: + if preamble_comment: + stream.write(preamble_comment) + self.dump(data, stream) + text = stream.getvalue() + strip_version_directive = hasattr(self, "_yaml_version_default") + return self._post_process_yaml( + text, + strip_version_directive=strip_version_directive, + ) + + # ruamel.yaml only preserves empty (no whitespace) blank lines + # (ie "/n/n" becomes "/n/n" but "/n /n" becomes "/n"). + # So, we need to identify whitespace-only lines to drop spaces before reading. + _whitespace_only_lines_re = re.compile(r"^ +$", re.MULTILINE) + + def _pre_process_yaml(self, text: str) -> tuple[str, str | None]: + """Handle known issues with ruamel.yaml loading. + + Preserve blank lines despite extra whitespace. + Preserve any preamble (aka header) comments before "---". + + For more on preamble comments, see: https://stackoverflow.com/questions/70286108/python-ruamel-yaml-package-how-to-get-header-comment-lines/70287507#70287507 + """ + text = self._whitespace_only_lines_re.sub("", text) + + # I investigated extending ruamel.yaml to capture preamble comments. + # preamble comment goes from: + # DocumentStartToken.comment -> DocumentStartEvent.comment + # Then, in the composer: + # once in composer.current_event + # discards DocumentStartEvent + # move DocumentStartEvent to composer.last_event + # all document nodes get composed (events get used) + # discard DocumentEndEvent + # move DocumentEndEvent to composer.last_event + # So, there's no convenient way to extend the composer + # to somehow capture the comments and pass them on. + + preamble_comments = [] + if "\n---\n" not in text and "\n--- " not in text: + # nothing is before the document start mark, + # so there are no comments to preserve. + return text, None + for line in text.splitlines(True): + # We only need to capture the preamble comments. No need to remove them. + # lines might also include directives. + if line.lstrip().startswith("#") or line == "\n": + preamble_comments.append(line) + elif line.startswith("---"): + break + + return text, "".join(preamble_comments) or None + + @staticmethod + def _post_process_yaml(text: str, *, strip_version_directive: bool = False) -> str: + """Handle known issues with ruamel.yaml dumping. + + Make sure there's only one newline at the end of the file. + + Fix the indent of full-line comments to match the indent of the next line. + See: https://stackoverflow.com/questions/71354698/how-can-i-use-the-ruamel-yaml-rtsc-mode/71355688#71355688 + Also, removes "#" protection from strings that prevents them from being + identified as full line comments in post-processing. + + Make sure null list items don't end in a space. + """ + # remove YAML directive + if strip_version_directive and text.startswith("%YAML"): + text = text.split("\n", 1)[1] + + text = text.rstrip("\n") + "\n" + + lines = text.splitlines(keepends=True) + full_line_comments: list[tuple[int, str]] = [] + for i, line in enumerate(lines): + stripped = line.lstrip() + if not stripped: + # blank line. Move on. + continue + + space_length = len(line) - len(stripped) + + if stripped.startswith("#"): + # got a full line comment + + # allow some full line comments to match the previous indent + if i > 0 and not full_line_comments and space_length: + prev = lines[i - 1] + prev_space_length = len(prev) - len(prev.lstrip()) + if prev_space_length == space_length: + # if the indent matches the previous line's indent, skip it. + continue + + full_line_comments.append((i, stripped)) + elif full_line_comments: + # end of full line comments so adjust to match indent of this line + spaces = " " * space_length + for index, comment in full_line_comments: + lines[index] = spaces + comment + full_line_comments.clear() + + cleaned = line.strip() + if not cleaned.startswith("#") and cleaned.endswith("-"): + # got an empty list item. drop any trailing spaces. + lines[i] = line.rstrip() + "\n" + + text = "".join( + FormattedEmitter.drop_octothorpe_protection(line) for line in lines + ) + return text From 199631926f2c2ab64ed7006fd2beb70ba5b14857 Mon Sep 17 00:00:00 2001 From: Sumit Jaiswal Date: Wed, 5 Jun 2024 12:09:03 +0530 Subject: [PATCH 3/9] fix ci Signed-off-by: Sumit Jaiswal --- ansible_risk_insight/yaml_utils.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/ansible_risk_insight/yaml_utils.py b/ansible_risk_insight/yaml_utils.py index 048cfdf7..c3a351b4 100644 --- a/ansible_risk_insight/yaml_utils.py +++ b/ansible_risk_insight/yaml_utils.py @@ -6,7 +6,7 @@ import logging from io import StringIO from pathlib import Path -from typing import TYPE_CHECKING, Any, cast +from typing import TYPE_CHECKING, Any import re from ruamel.yaml.comments import CommentedMap, CommentedSeq from ruamel.yaml.composer import ComposerError @@ -263,7 +263,6 @@ def __init__( # pylint: disable=too-many-arguments # (see https://stackoverflow.com/a/44314840/1134951) # self.Representer.add_representer( - @property def version(self) -> tuple[int, int] | None: """Return the YAML version used to parse or dump. @@ -308,7 +307,7 @@ def load(self, stream: Path | StreamTextType) -> Any: except ParserError as ex: data = None _logger.error( # noqa: TRY400 - "Invalid yaml, verify the file contents and try again.", + "Invalid yaml, verify the file contents and try again. %s", ex ) except Exception as ex: print(ex) @@ -336,7 +335,7 @@ def dumps(self, data: Any) -> str: text, strip_version_directive=strip_version_directive, ) - + # ruamel.yaml only preserves empty (no whitespace) blank lines # (ie "/n/n" becomes "/n/n" but "/n /n" becomes "/n"). # So, we need to identify whitespace-only lines to drop spaces before reading. @@ -348,7 +347,8 @@ def _pre_process_yaml(self, text: str) -> tuple[str, str | None]: Preserve blank lines despite extra whitespace. Preserve any preamble (aka header) comments before "---". - For more on preamble comments, see: https://stackoverflow.com/questions/70286108/python-ruamel-yaml-package-how-to-get-header-comment-lines/70287507#70287507 + For more on preamble comments, see: + https://stackoverflow.com/questions/70286108/python-ruamel-yaml-package-how-to-get-header-comment-lines/70287507#70287507 """ text = self._whitespace_only_lines_re.sub("", text) From 824a62f06fda1ac966add69901edf827f138bd08 Mon Sep 17 00:00:00 2001 From: Sumit Jaiswal Date: Thu, 6 Jun 2024 10:41:33 +0530 Subject: [PATCH 4/9] update fix code Signed-off-by: Sumit Jaiswal --- ansible_risk_insight/cli/__init__.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/ansible_risk_insight/cli/__init__.py b/ansible_risk_insight/cli/__init__.py index 0489043f..c6c63e06 100644 --- a/ansible_risk_insight/cli/__init__.py +++ b/ansible_risk_insight/cli/__init__.py @@ -235,9 +235,10 @@ def run(self): with open(ari_suggestion_file_path) as f: ari_suggestion_data = json.load(f) targets = ari_suggestion_data['targets'] - for i in reversed(range(len(targets) - 1)): + for i in reversed(range(len(targets))): + logging.info("Nodes dir number: %s", i) nodes = targets[i]['nodes'] - for j in reversed(range(len(nodes))): + for j in reversed(range(1, len(nodes))): node_rules = nodes[j]['rules'] for k in reversed(range(len(node_rules))): w007_rule = node_rules[k] @@ -248,6 +249,7 @@ def run(self): target_file_path = os.path.join(args.target_name, index_data[each], w007_rule['file'][0]) line_number = w007_rule['file'][1] update_the_yaml_target(target_file_path, line_number, mutated_yaml) + break # w007 rule with mutated yaml is processed, breaking out of iteration else: if not silent and not pretty: print("Start preparing dependencies") From 79ccf54ce9c19ec17a7beda8b65f2efab12ee2ad Mon Sep 17 00:00:00 2001 From: Sumit Jaiswal Date: Thu, 6 Jun 2024 12:02:08 +0530 Subject: [PATCH 5/9] add error handling --- ansible_risk_insight/cli/__init__.py | 6 +++ ansible_risk_insight/finder.py | 59 ++++++++++++++-------------- 2 files changed, 35 insertions(+), 30 deletions(-) diff --git a/ansible_risk_insight/cli/__init__.py b/ansible_risk_insight/cli/__init__.py index c6c63e06..c57de898 100644 --- a/ansible_risk_insight/cli/__init__.py +++ b/ansible_risk_insight/cli/__init__.py @@ -246,6 +246,12 @@ def run(self): if not w007_rule.get('verdict') and w007_rule: break mutated_yaml = w007_rule['detail']['mutated_yaml'] + if mutated_yaml == '': + break + if w007_rule['file'][0] not in index_data[each]: + target_file_path = os.path.join(args.target_name, index_data[each], w007_rule['file'][0]) + else: + target_file_path = os.path.join(args.target_name, index_data[each]) target_file_path = os.path.join(args.target_name, index_data[each], w007_rule['file'][0]) line_number = w007_rule['file'][1] update_the_yaml_target(target_file_path, line_number, mutated_yaml) diff --git a/ansible_risk_insight/finder.py b/ansible_risk_insight/finder.py index 97483eb8..53ed75fb 100644 --- a/ansible_risk_insight/finder.py +++ b/ansible_risk_insight/finder.py @@ -755,36 +755,36 @@ def update_the_yaml_target(file_path, line_number, new_content): logging.info("Target file path: %s", file_path) logging.info("Target line number: %s", input_line_number) logging.info("Target new content %s", new_content) + try: + # Read the original YAML file + with open(file_path, 'r') as file: + data = file.read() - # Read the original YAML file - with open(file_path, 'r') as file: - data = file.read() - - yaml = FormattedYAML( - # Ansible only uses YAML 1.1, but others files should use newer 1.2 (ruamel.yaml defaults to 1.2) - ) - # Parse the YAML content with preserved formatting - parsed_data = yaml.load(data) - if not isinstance(parsed_data, CommentedMap | CommentedSeq): - # This is an empty vars file or similar which loads as None. - # It is not safe to write this file or data-loss is likely. - # Only maps and sequences can preserve comments. Skip it. - print( - "Ignored reformatting %s because current implementation in ruamel.yaml would drop comments." - + " See https://sourceforge.net/p/ruamel-yaml/tickets/460/", - file, + yaml = FormattedYAML( + # Ansible only uses YAML 1.1, but others files should use newer 1.2 (ruamel.yaml defaults to 1.2) ) - new_parsed_data = yaml.load(new_content) - if new_parsed_data == parsed_data: - logging.info("Current data and ARI mutated data are same!") - return - if not new_parsed_data: - return - new_parsed_data = new_parsed_data[0] - # variable to keep a check if there's a change in mutated and existing data - no_change = False - # parsed_data = parsed_data[0] - try: + # Parse the YAML content with preserved formatting + parsed_data = yaml.load(data) + if not isinstance(parsed_data, CommentedMap | CommentedSeq): + # This is an empty vars file or similar which loads as None. + # It is not safe to write this file or data-loss is likely. + # Only maps and sequences can preserve comments. Skip it. + print( + "Ignored reformatting %s because current implementation in ruamel.yaml would drop comments." + + " See https://sourceforge.net/p/ruamel-yaml/tickets/460/", + file, + ) + new_parsed_data = yaml.load(new_content) + if new_parsed_data == parsed_data: + logging.info("Current data and ARI mutated data are same!") + return + if not new_parsed_data: + return + new_parsed_data = new_parsed_data[0] + # variable to keep a check if there's a change in mutated and existing data + no_change = False + # parsed_data = parsed_data[0] + if isinstance(parsed_data, list): if parsed_data[0].get('tasks'): tasks = [each_task for each_task in parsed_data[0]['tasks']] @@ -811,6 +811,5 @@ def update_the_yaml_target(file_path, line_number, new_content): with open(file_path, 'w') as file: yaml.dump(parsed_data, file) except Exception as ex: - print("ARI fix functionality failed with: %s", ex) - logging.warning("ARI fix functionality failed with: %s", ex) + logging.warning("ARI yaml update fix functionality failed with: %s for file: %s", ex, file_path) return From 0a60894948e135583d84fbabfd9ccb50287d4d9a Mon Sep 17 00:00:00 2001 From: Sumit Jaiswal Date: Thu, 6 Jun 2024 12:18:34 +0530 Subject: [PATCH 6/9] add debug steps Signed-off-by: Sumit Jaiswal --- .vscode/launch.json | 14 +++----------- README.md | 20 ++++++++++++++++++++ 2 files changed, 23 insertions(+), 11 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index 22d3ffb1..47a83312 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -1,7 +1,4 @@ { - // Use IntelliSense to learn about possible attributes. - // Hover to view descriptions of existing attributes. - // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 "version": "0.2.0", "configurations": [ { @@ -9,15 +6,10 @@ "type": "debugpy", "request": "attach", "connect": { - "host": "localhost:5678", + "host": "localhost", "port": 5678 }, - "pathMappings": [ - { - "localRoot": "${workspaceFolder}", - "remoteRoot": "." - } - ] - } + "justMyCode": false + }, ] } \ No newline at end of file diff --git a/README.md b/README.md index ba8a0fcd..954bd365 100644 --- a/README.md +++ b/README.md @@ -99,3 +99,23 @@ git clone git@github.com:ansible/ansible-risk-insight.git cd ansible-risk-insight pip install -e . ``` + +## Debugging ARI over VSCode (for development) + +ARI can be debugged using VSCode. Steps to start debugging: + +Step 1: Please add below line to fine that needs to be debugged: +``` +import debugpy +debugpy.listen(5678) +debugpy.wait_for_client() +``` +Step 2: Fire the ARI command via cli command to run the ARI, ref as: +``` +(.env) ➜ ari project --out-dir /tmp/CS --save-only-rule-result --scan-per-target --task-num-threshold 100 --fix +0.00s - Debugger warning: It seems that frozen modules are being used, which may +0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off +0.00s - to python to disable frozen modules. +0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation. +``` +Step 3: From VSCode, click `Run->Start Debugging`, debugger should stop at the breakpoints placed inside the ARI code. From 6ff6c0b3f70e2f4a8f67bc3b10cf90d4e3824c9c Mon Sep 17 00:00:00 2001 From: Sumit Jaiswal Date: Thu, 6 Jun 2024 12:20:47 +0530 Subject: [PATCH 7/9] fix spelling Signed-off-by: Sumit Jaiswal --- .vscode/launch.json | 2 +- README.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index 47a83312..2173926d 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -12,4 +12,4 @@ "justMyCode": false }, ] -} \ No newline at end of file +} diff --git a/README.md b/README.md index 954bd365..a7ab6695 100644 --- a/README.md +++ b/README.md @@ -104,7 +104,7 @@ pip install -e . ARI can be debugged using VSCode. Steps to start debugging: -Step 1: Please add below line to fine that needs to be debugged: +Step 1: Please add below line to file that needs to be debugged: ``` import debugpy debugpy.listen(5678) From c81f22e77df1b33779e6020c0b830d42a0f7871a Mon Sep 17 00:00:00 2001 From: Sumit Jaiswal Date: Thu, 6 Jun 2024 12:45:25 +0530 Subject: [PATCH 8/9] update logger to ari logger Signed-off-by: Sumit Jaiswal --- README.md | 1 + ansible_risk_insight/cli/__init__.py | 12 ++++-------- ansible_risk_insight/finder.py | 19 ++++++------------- ansible_risk_insight/yaml_utils.py | 6 ++---- 4 files changed, 13 insertions(+), 25 deletions(-) diff --git a/README.md b/README.md index a7ab6695..e05140c4 100644 --- a/README.md +++ b/README.md @@ -118,4 +118,5 @@ Step 2: Fire the ARI command via cli command to run the ARI, ref as: 0.00s - to python to disable frozen modules. 0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation. ``` +Note: If you want to disable the validation warning, please set `PYDEVD_DISABLE_FILE_VALIDATION=1` under your enviornment. Step 3: From VSCode, click `Run->Start Debugging`, debugger should stop at the breakpoints placed inside the ARI code. diff --git a/ansible_risk_insight/cli/__init__.py b/ansible_risk_insight/cli/__init__.py index c57de898..34ad5a02 100644 --- a/ansible_risk_insight/cli/__init__.py +++ b/ansible_risk_insight/cli/__init__.py @@ -16,7 +16,6 @@ import os import json -import logging import argparse from ..scanner import ARIScanner, config @@ -28,10 +27,7 @@ split_name_and_version, ) from ..finder import list_scan_target, update_the_yaml_target - -logging.basicConfig( - level=os.environ.get('LOGLEVEL', 'WARNING').upper() -) +import ansible_risk_insight.logger as logger class ARICLI: @@ -225,18 +221,18 @@ def run(self): for i, fpath in enumerate(list_per_type): index_data[i] = fpath list_file_path = os.path.join(args.out_dir, f"{scan_type}s", "index.json") - logging.info("list_file_path: ", list_file_path) + logger.debug("list_file_path: ", list_file_path) with open(list_file_path, "w") as file: json.dump(index_data, file) if args.fix: for each in index_data.keys(): ari_suggestion_file_path = os.path.join(args.out_dir, f"{scan_type}s", str(each), "rule_result.json") - logging.info("ARI suggestion file path: %s", ari_suggestion_file_path) + logger.debug("ARI suggestion file path: %s", ari_suggestion_file_path) with open(ari_suggestion_file_path) as f: ari_suggestion_data = json.load(f) targets = ari_suggestion_data['targets'] for i in reversed(range(len(targets))): - logging.info("Nodes dir number: %s", i) + logger.debug("Nodes dir number: %s", i) nodes = targets[i]['nodes'] for j in reversed(range(1, len(nodes))): node_rules = nodes[j]['rules'] diff --git a/ansible_risk_insight/finder.py b/ansible_risk_insight/finder.py index 53ed75fb..19b13dd1 100644 --- a/ansible_risk_insight/finder.py +++ b/ansible_risk_insight/finder.py @@ -19,7 +19,6 @@ import re import os import json -import logging import yaml import traceback from ansible_risk_insight.yaml_utils import FormattedYAML @@ -36,10 +35,6 @@ from .safe_glob import safe_glob from .awx_utils import could_be_playbook, search_playbooks -logging.basicConfig( - level=os.environ.get('LOGLEVEL', 'WARNING').upper() -) - fqcn_module_name_re = re.compile(r"^[a-z0-9_]+\.[a-z0-9_]+\.[a-z0-9_]+$") module_name_re = re.compile(r"^[a-z0-9_.]+$") @@ -742,19 +737,18 @@ def list_scan_target(root_dir: str, task_num_threshold: int = -1): def check_and_replace(new_data, old_data, replaced=False): if new_data == old_data: - logging.info("Current file data and ARI mutated data are same!") + logger.info("Current file data and ARI mutated data are same!") return True if new_data['name'] == old_data['name']: - # each_task = new_parsed_data replaced = True return new_data, replaced def update_the_yaml_target(file_path, line_number, new_content): input_line_number = line_number.lstrip("L").split("-") - logging.info("Target file path: %s", file_path) - logging.info("Target line number: %s", input_line_number) - logging.info("Target new content %s", new_content) + logger.debug("Target file path: %s", file_path) + logger.debug("Target line number: %s", input_line_number) + logger.debug("Target new content %s", new_content) try: # Read the original YAML file with open(file_path, 'r') as file: @@ -776,14 +770,13 @@ def update_the_yaml_target(file_path, line_number, new_content): ) new_parsed_data = yaml.load(new_content) if new_parsed_data == parsed_data: - logging.info("Current data and ARI mutated data are same!") + logger.info("Current data and ARI mutated data are same!") return if not new_parsed_data: return new_parsed_data = new_parsed_data[0] # variable to keep a check if there's a change in mutated and existing data no_change = False - # parsed_data = parsed_data[0] if isinstance(parsed_data, list): if parsed_data[0].get('tasks'): @@ -811,5 +804,5 @@ def update_the_yaml_target(file_path, line_number, new_content): with open(file_path, 'w') as file: yaml.dump(parsed_data, file) except Exception as ex: - logging.warning("ARI yaml update fix functionality failed with: %s for file: %s", ex, file_path) + logger.warning("ARI yaml update fix functionality failed with: %s for file: %s", ex, file_path) return diff --git a/ansible_risk_insight/yaml_utils.py b/ansible_risk_insight/yaml_utils.py index c3a351b4..9eceaa16 100644 --- a/ansible_risk_insight/yaml_utils.py +++ b/ansible_risk_insight/yaml_utils.py @@ -3,7 +3,6 @@ # pylint: disable=too-many-lines from __future__ import annotations -import logging from io import StringIO from pathlib import Path from typing import TYPE_CHECKING, Any @@ -18,6 +17,7 @@ from ruamel.yaml.main import YAML from ruamel.yaml.parser import ParserError from ruamel.yaml.scalarint import HexInt, ScalarInt +import ansible_risk_insight.logger as logger if TYPE_CHECKING: @@ -26,8 +26,6 @@ from ruamel.yaml.nodes import ScalarNode from ruamel.yaml.representer import RoundTripRepresenter -_logger = logging.getLogger(__name__) - class OctalIntYAML11(ScalarInt): """OctalInt representation for YAML 1.1.""" @@ -306,7 +304,7 @@ def load(self, stream: Path | StreamTextType) -> Any: data = self.load_all(stream=text) except ParserError as ex: data = None - _logger.error( # noqa: TRY400 + logger.error( # noqa: TRY400 "Invalid yaml, verify the file contents and try again. %s", ex ) except Exception as ex: From eca1c429531795f0df4507ecd81166e4c05024d1 Mon Sep 17 00:00:00 2001 From: Sumit Jaiswal Date: Thu, 6 Jun 2024 12:47:08 +0530 Subject: [PATCH 9/9] add EOL Signed-off-by: Sumit Jaiswal --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index e05140c4..e41d7fff 100644 --- a/README.md +++ b/README.md @@ -119,4 +119,5 @@ Step 2: Fire the ARI command via cli command to run the ARI, ref as: 0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation. ``` Note: If you want to disable the validation warning, please set `PYDEVD_DISABLE_FILE_VALIDATION=1` under your enviornment. + Step 3: From VSCode, click `Run->Start Debugging`, debugger should stop at the breakpoints placed inside the ARI code.