diff --git a/lib/health/query.py b/lib/health/query.py index 260aa94c..1d16e6e4 100644 --- a/lib/health/query.py +++ b/lib/health/query.py @@ -407,13 +407,13 @@ stop_writes = select "stop_writes" from NAMESPACE.STATISTICS; stop_writes = group by CLUSTER, NAMESPACE stop_writes; ASSERT(stop_writes, False, "Namespace has hit stop-writes (stop_writes = true)", "OPERATIONS" , CRITICAL, - "Listed namespace(s) have hit stop-write. Please run 'show statistics namespace like stop_writes' for details.", + "Listed namespace(s) have hit stop-write. Please run 'show stop-writes' for details.", "Namespace stop-writes flag check."); clock_skew_stop_writes = select "clock_skew_stop_writes" from NAMESPACE.STATISTICS; clock_skew_stop_writes = group by CLUSTER, NAMESPACE clock_skew_stop_writes; ASSERT(clock_skew_stop_writes, False, "Namespace has hit clock-skew-stop-writes (clock_skew_stop_writes = true)", "OPERATIONS" , CRITICAL, - "Listed namespace(s) have hit clock-skew-stop-writes. Please run 'show statistics namespace like clock_skew_stop_writes' for details.", + "Listed namespace(s) have hit clock-skew-stop-writes. Please run 'show stop-writes' for details.", "Namespace clock-skew-stop-writes flag check."); SET CONSTRAINT VERSION < 4.3; diff --git a/lib/utils/common.py b/lib/utils/common.py index 84eae61f..ee456f53 100644 --- a/lib/utils/common.py +++ b/lib/utils/common.py @@ -1647,13 +1647,22 @@ def _create_stop_writes_entry( node_sw_metrics[(namespace, set_, metric)] = entry -@staticmethod def _is_stop_writes_cause( - usage: int | float, threshold: int | float, stop_writes: str | None = None + usage: int | float, + threshold: int | float, + stop_writes: str | None = None, + invert: bool = False, ): if threshold == 0: return False + if invert: + return ( + True + if usage <= threshold + and (stop_writes is None or stop_writes.lower() == "true") + else False + ) return ( True if usage >= threshold and (stop_writes is None or stop_writes.lower() == "true") @@ -1661,27 +1670,57 @@ def _is_stop_writes_cause( ) -@staticmethod +def _get_first_value_from_dict_with_key( + dict_: dict[str, Any], + key: str | tuple, + default_value: Any = None, + return_type: type = str, +) -> tuple[Any, Any]: + if isinstance(key, str): + key = (key,) + + for key in key: + if key in dict_: + return key, util.get_value_from_dict( + dict_, key, default_value=default_value, return_type=return_type + ) + + return None, None + + def _format_ns_stop_writes_metrics( stop_writes_metrics: StopWritesDict, service_stats, ns_stats, ): for node in service_stats: - cluster_clock_skew_ms = service_stats[node].get("cluster_clock_skew_ms", None) - cluster_clock_skew_stop_writes_sec = service_stats[node].get( - "cluster_clock_skew_stop_writes_sec", None + cluster_clock_skew_ms: int | None = util.get_value_from_dict( + service_stats[node], + "cluster_clock_skew_ms", + None, + return_type=int, + ) + cluster_clock_skew_stop_writes_sec: int | None = util.get_value_from_dict( + service_stats[node], + "cluster_clock_skew_stop_writes_sec", + None, + return_type=int, + ) + system_free_mem_pct: int | None = util.get_value_from_dict( + service_stats[node], + "system_free_mem_pct", + None, + return_type=int, ) - system_free_mem_pct = service_stats[node].get("system_free_mem_pct", None) for ns, stats in ns_stats.get(node, {}).items(): # There is no config for this trigger strong_consistency: str | None = stats.get("strong-consistency", None) nsup_period: str | None = stats.get("nsup-period", None) stop_writes: str | None = stats.get("clock_skew_stop_writes", None) - metric: str = "cluster_clock_skew_ms" - usage = cluster_clock_skew_ms - threshold = cluster_clock_skew_stop_writes_sec + metric = "cluster_clock_skew_ms" + usage: int | float | None = cluster_clock_skew_ms + threshold: int | float | None = cluster_clock_skew_stop_writes_sec """ For Available mode (AP) namespaces running versions 4.5.1 or above and where @@ -1694,34 +1733,32 @@ def _format_ns_stop_writes_metrics( and nsup_period is not None # nsup-period was added in 4.5.1. and nsup_period != "0" ): - thresh = 40000 + threshold = 40000 else: - thresh = ( - int(cluster_clock_skew_stop_writes_sec) * 1000 - ) # convert to ms - use = int(usage) - sw = _is_stop_writes_cause(use, thresh, stop_writes) + threshold = int(threshold) * 1000 # convert to ms + + sw = _is_stop_writes_cause(usage, threshold, stop_writes) _create_stop_writes_entry( stop_writes_metrics[node], metric, - use, + usage, sw, - thresh, + threshold, namespace=ns, ) stop_writes: str | None = stats.get("stop_writes", None) metric = "system_free_mem_pct" config = "stop-writes-sys-memory-pct" - threshold: str | None = stats.get(config, None) + threshold = util.get_value_from_dict(stats, config, None, return_type=int) if ( threshold is not None and system_free_mem_pct is not None and stop_writes is not None ): - thresh = int(threshold) - use = 100 - int(system_free_mem_pct) + thresh = threshold + use = 100 - system_free_mem_pct sw = _is_stop_writes_cause(use, thresh, stop_writes) _create_stop_writes_entry( stop_writes_metrics[node], @@ -1740,70 +1777,87 @@ def _format_ns_stop_writes_metrics( if stop_writes is None: continue - metric = "device_avail_pct" - config = "min-avail-pct" - usage: str | None = stats.get(metric, None) - threshold: str | None = stats.get(config, None) - - if usage is None: - metric = "pmem_avail_pct" - usage = stats.get(metric, None) + metric, usage = _get_first_value_from_dict_with_key( + stats, + ("data_avail_pct", "device_available_pct", "pmem_available_pct"), + default_value=None, + return_type=int, + ) + config, threshold = _get_first_value_from_dict_with_key( + stats, + ( + "storage-engine.stop-writes-avail-pct", + "storage-engine.min-avail-pct", + ), + default_value=None, + return_type=int, + ) if usage is not None and threshold is not None: - use = int(usage) - thresh = int(threshold) - sw = _is_stop_writes_cause(use, thresh, stop_writes) + sw = _is_stop_writes_cause(usage, threshold, stop_writes, invert=True) _create_stop_writes_entry( stop_writes_metrics[node], metric, - use, + usage, sw, - thresh, + threshold, config=config, namespace=ns, ) - metric = "device_used_bytes" - config = "max-used-pct" - usage: str | None = stats.get(metric, None) - bytes_total: str | None = stats.get("device_total_bytes", None) - threshold: str | None = stats.get(config, None) - - if usage is None: - metric = "pmem_used_bytes" - usage = stats.get(metric, None) - bytes_total = stats.get("pmem_total_bytes", None) + metric, usage = _get_first_value_from_dict_with_key( + stats, + ("data_used_bytes", "device_used_bytes", "pmem_used_bytes"), + default_value=None, + return_type=int, + ) + config, threshold = _get_first_value_from_dict_with_key( + stats, + ("storage-engine.stop-writes-used-pct", "storage-engine.max-used-pct"), + default_value=None, + return_type=int, + ) + bytes_total: int | float | None = util.get_value_from_dict( + stats, + ("data_total_bytes", "device_total_bytes", "pmem_total_bytes"), + None, + return_type=int, + ) if usage is not None and threshold is not None and bytes_total is not None: - use = int(usage) - thresh = int(bytes_total) * (int(threshold) / 100) - sw = _is_stop_writes_cause(use, thresh, stop_writes) + threshold = bytes_total * (threshold / 100) + sw = _is_stop_writes_cause(usage, threshold, stop_writes) _create_stop_writes_entry( stop_writes_metrics[node], metric, - use, + usage, sw, - thresh, + threshold, config=config, namespace=ns, ) metric = "memory_used_bytes" config = "stop-writes-pct" - usage: str | None = stats.get(metric, None) - bytes_total: str | None = stats.get("memory-size", None) - threshold: str | None = stats.get(config, None) + usage = util.get_value_from_dict( + stats, metric, default_value=None, return_type=int + ) + bytes_total = util.get_value_from_dict( + stats, "memory-size", default_value=None, return_type=int + ) + threshold = util.get_value_from_dict( + stats, config, default_value=None, return_type=int + ) if usage is not None and threshold is not None and bytes_total is not None: - use = int(usage) - thresh = int(bytes_total) * (int(threshold) / 100) - sw = _is_stop_writes_cause(use, thresh, stop_writes) + threshold = bytes_total * (threshold / 100) + sw = _is_stop_writes_cause(usage, threshold, stop_writes) _create_stop_writes_entry( stop_writes_metrics[node], metric, - use, + usage, sw, - thresh, + threshold, config=config, namespace=ns, ) diff --git a/lib/view/sheet/decleration.py b/lib/view/sheet/decleration.py index 2a73b121..96546fd7 100644 --- a/lib/view/sheet/decleration.py +++ b/lib/view/sheet/decleration.py @@ -147,16 +147,19 @@ def fun(edata: EntryValue): return fun @staticmethod - def _fmt_pct_type(val: float): + def _fmt_pct_type(val: float, invert: bool = False): + if invert: + val = 100 - val + return str(round(float(val), 2)) + " %" @staticmethod - def ratio_to_pct(edata: EntryValue): - return Converters._fmt_pct_type(edata.value * 100) + def ratio_to_pct(edata: EntryValue, invert: bool = False): + return Converters._fmt_pct_type(edata.value * 100, invert) @staticmethod - def pct(edata: EntryValue): - return Converters._fmt_pct_type(edata.value) + def pct(edata: EntryValue, invert: bool = False): + return Converters._fmt_pct_type(edata.value, invert) FormatterPredicateFnType = Callable[[EntryData], bool] diff --git a/lib/view/templates.py b/lib/view/templates.py index a2d30f11..9f20e3ce 100644 --- a/lib/view/templates.py +++ b/lib/view/templates.py @@ -1677,8 +1677,14 @@ def stop_writes_converter_selector(edata: EntryData): return None metric = edata.record["Metric"] + val = "" if "pct" in metric: + if "avail" in metric: + val = Converters.pct(edata, invert=True) + val = "(inverted) " + val + return val + return Converters.pct(edata) if "bytes" in metric: return Converters.byte(edata) @@ -1688,6 +1694,25 @@ def stop_writes_converter_selector(edata: EntryData): return Converters.scientific_units(edata) +class StopWritesUsagePctProjector(Projectors.Number): + def __init__(self, source, *keys, **kwargs): + """ + Keyword Arguments: + invert -- False by default, if True will return 100 - value. + """ + super().__init__(source, *keys, **kwargs) + self.invert = kwargs.get("invert", False) + + def do_project(self, sheet, sources): + data = sources.get("stop_writes", ((), {}))[1] + val = super().do_project(sheet, sources) + + if "metric" in data and "avail" in data["metric"]: + val = 100 - val + + return _ignore_zero(val) + + sw_row_yellow_format = ( Formatters.yellow_alert(lambda edata: edata.record["Stop-Writes"] == True), ) @@ -1738,12 +1763,8 @@ def stop_writes_converter_selector(edata: EntryData): Field( "Usage%", Projectors.Div( - Projectors.Number("stop_writes", "metric_usage"), - Projectors.Func( - FieldType.number, - _ignore_zero, - Projectors.Number("stop_writes", "metric_threshold"), - ), + StopWritesUsagePctProjector("stop_writes", "metric_usage"), + StopWritesUsagePctProjector("stop_writes", "metric_threshold"), ), converter=Converters.ratio_to_pct, formatters=sw_val_red_format + sw_val_yellow_format + sw_row_yellow_format, diff --git a/test/unit/utils/test_common.py b/test/unit/utils/test_common.py index d03bae57..64e36178 100644 --- a/test/unit/utils/test_common.py +++ b/test/unit/utils/test_common.py @@ -786,25 +786,25 @@ def create_tc( } }, ), - # stop_writes not triggered by device_avail_pct + # stop_writes not triggered by device_available_pct create_tc( ns_stats={ "1.1.1.1": { "ns1": { "stop_writes": "true", - "device_avail_pct": "50", + "device_available_pct": "50", } }, }, ns_config={ - "1.1.1.1": {"ns1": {"min-avail-pct": "55"}}, + "1.1.1.1": {"ns1": {"storage-engine.min-avail-pct": "55"}}, }, expected={ "1.1.1.1": { - ("ns1", None, "device_avail_pct"): { - "metric": "device_avail_pct", - "config": "min-avail-pct", - "stop_writes": False, + ("ns1", None, "device_available_pct"): { + "metric": "device_available_pct", + "config": "storage-engine.min-avail-pct", + "stop_writes": True, "metric_usage": 50, "metric_threshold": 55, "namespace": "ns1", @@ -817,19 +817,19 @@ def create_tc( "1.1.1.1": { "ns1": { "stop_writes": "true", - "device_avail_pct": "56", + "device_available_pct": "56", } }, }, ns_config={ - "1.1.1.1": {"ns1": {"min-avail-pct": "55"}}, + "1.1.1.1": {"ns1": {"storage-engine.min-avail-pct": "55"}}, }, expected={ "1.1.1.1": { - ("ns1", None, "device_avail_pct"): { - "metric": "device_avail_pct", - "config": "min-avail-pct", - "stop_writes": True, + ("ns1", None, "device_available_pct"): { + "metric": "device_available_pct", + "config": "storage-engine.min-avail-pct", + "stop_writes": False, "metric_usage": 56, "metric_threshold": 55, "namespace": "ns1", @@ -842,19 +842,19 @@ def create_tc( "1.1.1.1": { "ns1": { "stop_writes": "true", - "pmem_avail_pct": "56", + "pmem_available_pct": "56", } }, }, ns_config={ - "1.1.1.1": {"ns1": {"min-avail-pct": "55"}}, + "1.1.1.1": {"ns1": {"storage-engine.min-avail-pct": "55"}}, }, expected={ "1.1.1.1": { - ("ns1", None, "pmem_avail_pct"): { - "metric": "pmem_avail_pct", - "config": "min-avail-pct", - "stop_writes": True, + ("ns1", None, "pmem_available_pct"): { + "metric": "pmem_available_pct", + "config": "storage-engine.min-avail-pct", + "stop_writes": False, "metric_usage": 56, "metric_threshold": 55, "namespace": "ns1", @@ -862,24 +862,51 @@ def create_tc( } }, ), + # stop_writes triggered by data_avail_pct create_tc( ns_stats={ "1.1.1.1": { "ns1": { "stop_writes": "true", - "pmem_avail_pct": "56", + "data_avail_pct": "55", } }, }, ns_config={ - "1.1.1.1": {"ns1": {"min-avail-pct": "55"}}, + "1.1.1.1": {"ns1": {"storage-engine.stop-writes-avail-pct": "56"}}, }, expected={ "1.1.1.1": { - ("ns1", None, "pmem_avail_pct"): { - "metric": "pmem_avail_pct", - "config": "min-avail-pct", + ("ns1", None, "data_avail_pct"): { + "metric": "data_avail_pct", + "config": "storage-engine.stop-writes-avail-pct", "stop_writes": True, + "metric_usage": 55, + "metric_threshold": 56, + "namespace": "ns1", + }, + } + }, + ), + # stop_writes not triggered by data_avail_pct + create_tc( + ns_stats={ + "1.1.1.1": { + "ns1": { + "stop_writes": "true", + "data_avail_pct": "56", + } + }, + }, + ns_config={ + "1.1.1.1": {"ns1": {"storage-engine.min-avail-pct": "55"}}, + }, + expected={ + "1.1.1.1": { + ("ns1", None, "data_avail_pct"): { + "metric": "data_avail_pct", + "config": "storage-engine.min-avail-pct", + "stop_writes": False, "metric_usage": 56, "metric_threshold": 55, "namespace": "ns1", @@ -899,13 +926,13 @@ def create_tc( }, }, ns_config={ - "1.1.1.1": {"ns1": {"max-used-pct": "90"}}, + "1.1.1.1": {"ns1": {"storage-engine.max-used-pct": "90"}}, }, expected={ "1.1.1.1": { ("ns1", None, "device_used_bytes"): { "metric": "device_used_bytes", - "config": "max-used-pct", + "config": "storage-engine.max-used-pct", "stop_writes": False, "metric_usage": 10, "metric_threshold": 90, @@ -926,13 +953,13 @@ def create_tc( }, }, ns_config={ - "1.1.1.1": {"ns1": {"max-used-pct": "90"}}, + "1.1.1.1": {"ns1": {"storage-engine.max-used-pct": "90"}}, }, expected={ "1.1.1.1": { ("ns1", None, "device_used_bytes"): { "metric": "device_used_bytes", - "config": "max-used-pct", + "config": "storage-engine.max-used-pct", "stop_writes": True, "metric_usage": 90, "metric_threshold": 90, @@ -953,13 +980,13 @@ def create_tc( }, }, ns_config={ - "1.1.1.1": {"ns1": {"max-used-pct": "90"}}, + "1.1.1.1": {"ns1": {"storage-engine.max-used-pct": "90"}}, }, expected={ "1.1.1.1": { ("ns1", None, "pmem_used_bytes"): { "metric": "pmem_used_bytes", - "config": "max-used-pct", + "config": "storage-engine.max-used-pct", "stop_writes": False, "metric_usage": 10, "metric_threshold": 90, @@ -980,13 +1007,67 @@ def create_tc( }, }, ns_config={ - "1.1.1.1": {"ns1": {"max-used-pct": "90"}}, + "1.1.1.1": {"ns1": {"storage-engine.max-used-pct": "90"}}, }, expected={ "1.1.1.1": { ("ns1", None, "pmem_used_bytes"): { "metric": "pmem_used_bytes", - "config": "max-used-pct", + "config": "storage-engine.max-used-pct", + "stop_writes": True, + "metric_usage": 90, + "metric_threshold": 90, + "namespace": "ns1", + }, + } + }, + ), + # stop_writes not triggered by data_used_bytes + create_tc( + ns_stats={ + "1.1.1.1": { + "ns1": { + "stop_writes": "true", + "data_used_bytes": "10", + "data_total_bytes": "100", + } + }, + }, + ns_config={ + "1.1.1.1": {"ns1": {"storage-engine.stop-writes-used-pct": "90"}}, + }, + expected={ + "1.1.1.1": { + ("ns1", None, "data_used_bytes"): { + "metric": "data_used_bytes", + "config": "storage-engine.stop-writes-used-pct", + "stop_writes": False, + "metric_usage": 10, + "metric_threshold": 90, + "namespace": "ns1", + }, + } + }, + ), + # stop_writes is triggered by data_used_bytes + create_tc( + ns_stats={ + "1.1.1.1": { + "ns1": { + "stop_writes": "true", + "data_used_bytes": "90", + "data_total_bytes": "100", + } + }, + }, + ns_config={ + "1.1.1.1": {"ns1": {"storage-engine.stop-writes-used-pct": "90"}}, + }, + expected={ + "1.1.1.1": { + ("ns1", None, "data_used_bytes"): { + "metric": "data_used_bytes", + "config": "storage-engine.stop-writes-used-pct", "stop_writes": True, "metric_usage": 90, "metric_threshold": 90,