From bc7c16caf056cc0db304462136bfb6bbf78c2d5e Mon Sep 17 00:00:00 2001 From: Andreas Maier Date: Sun, 12 Jun 2022 16:47:40 +0200 Subject: [PATCH] Automatic detection of HMC resources that no longer exist Details: * HMC resources that no longer exist are automatically removed from the exported metrics. (Issue #203) Signed-off-by: Andreas Maier --- Makefile | 3 +- docs/changes.rst | 3 ++ docs/usage.rst | 4 ++ minimum-constraints.txt | 3 +- requirements.txt | 5 ++- .../zhmc_prometheus_exporter.py | 42 +++++++++++++++++-- 6 files changed, 53 insertions(+), 7 deletions(-) diff --git a/Makefile b/Makefile index c44b2c0d..82a2c3f5 100644 --- a/Makefile +++ b/Makefile @@ -212,7 +212,8 @@ pylint: develop_$(pymn).done check_reqs: develop_$(pymn).done @echo "Makefile: Checking missing dependencies of the package" pip-missing-reqs $(package_dir) --requirements-file=requirements.txt - pip-missing-reqs $(package_dir) --requirements-file=minimum-constraints.txt +# TODO: Remove error ignore marker once zhmcclient 1.4.0 is released + -pip-missing-reqs $(package_dir) --requirements-file=minimum-constraints.txt @echo "Makefile: Done checking missing dependencies of the package" ifeq ($(PLATFORM),Windows_native) # Reason for skipping on Windows is https://github.com/r1chardj0n3s/pip-check-reqs/issues/67 diff --git a/docs/changes.rst b/docs/changes.rst index 020752c7..92d78248 100644 --- a/docs/changes.rst +++ b/docs/changes.rst @@ -32,6 +32,9 @@ Released: not yet **Enhancements:** +* HMC resources that no longer exist are automatically removed from the + exported metrics. (Issue #203) + **Cleanup:** **Known issues:** diff --git a/docs/usage.rst b/docs/usage.rst index 53eff92f..53032800 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -276,6 +276,10 @@ retrieved from the HMC, but they are exported to Prometheus in the same way: representations and can immediately return them without having to turn around for getting them from the HMC. + Resources that no longer exist on the HMC are automatically not exported + anymore. Resources that were created on the HMC since the exporter was + started are not detected. + The exporter code is agnostic to the actual set of metrics supported by the HMC. A new metric exposed by the HMC metric service or a new property added to one of the auto-updated resources can immediately be supported by just adding it to diff --git a/minimum-constraints.txt b/minimum-constraints.txt index 47e95b3c..daf8731b 100644 --- a/minimum-constraints.txt +++ b/minimum-constraints.txt @@ -71,7 +71,8 @@ wheel==0.33.5; python_version >= '3.8' # Direct dependencies for runtime (must be consistent with requirements.txt) -zhmcclient==1.3.1 +# TODO: Use zhmcclient 1.4.0 once released, before releasing this project. +# zhmcclient==1.4.0 prometheus-client==0.9.0 urllib3==1.25.9; python_version <= '3.9' diff --git a/requirements.txt b/requirements.txt index 443b3447..c692ed60 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,8 +6,9 @@ # Direct dependencies for runtime (must be consistent with minimum-constraints.txt) -# git+https://github.com/zhmcclient/python-zhmcclient.git@stable_1.3#egg=zhmcclient -zhmcclient>=1.3.1 +# TODO: Use zhmcclient 1.4.0 once released, before releasing this project. +git+https://github.com/zhmcclient/python-zhmcclient.git@master#egg=zhmcclient +# zhmcclient>=1.4.0 prometheus-client>=0.9.0 urllib3>=1.25.9; python_version <= '3.9' diff --git a/zhmc_prometheus_exporter/zhmc_prometheus_exporter.py b/zhmc_prometheus_exporter/zhmc_prometheus_exporter.py index 3934a10f..1e03319f 100755 --- a/zhmc_prometheus_exporter/zhmc_prometheus_exporter.py +++ b/zhmc_prometheus_exporter/zhmc_prometheus_exporter.py @@ -660,6 +660,16 @@ def resource(self, uri, object_value): self._resources[uri] = _resource return _resource + def remove(self, uri): + """ + Remove the resource with a specified URI from the cache, if present. + If not present, nothing happens. + """ + try: + del self._resources[uri] + except KeyError: + pass + def build_family_objects(metrics_object, yaml_metric_groups, yaml_metrics, metrics_filename, extra_labels, @@ -667,6 +677,8 @@ def build_family_objects(metrics_object, yaml_metric_groups, yaml_metrics, """ Go through all retrieved metrics and build the Prometheus Family objects. + Note: resource_cache will be omitted in tests, and is therefore optional. + Returns a dictionary of Prometheus Family objects with the following structure: @@ -776,11 +788,13 @@ def build_family_objects(metrics_object, yaml_metric_groups, yaml_metrics, def build_family_objects_res( resources, yaml_metric_groups, yaml_metrics, metrics_filename, - extra_labels): + extra_labels, resource_cache=None): """ Go through all auto-updated resources and build the Prometheus Family objects for them. + Note: resource_cache will be omitted in tests, and is therefore optional. + Returns a dictionary of Prometheus Family objects with the following structure: @@ -793,7 +807,29 @@ def build_family_objects_res( for metric_group, res_list in resources.items(): yaml_metric_group = yaml_metric_groups[metric_group] - for resource in res_list: + for i, resource in enumerate(list(res_list)): + # Note: We use list() because resources that no longer exist will + # be removed from the original list, so this provides a stable + # iteration when items are removed from the original list. + + if resource.ceased_existence: + try: + res_str = resource.name + except zhmcclient.CeasedExistence: + # For attribute 'name', the exception is only raised when + # the name is not yet known locally. + res_str = "with URI {}".format(resource.uri) + verbose2("Resource no longer exists on HMC: {} {}". + format(resource.manager.class_name, res_str)) + # Remove the resource from the list so it no longer show up + # in Prometheus data. + del res_list[i] + # Remove the resource from the resource cache. This does not + # influence what is shown in Prometheus data, but it is simply + # a cleanup. + if resource_cache: + resource_cache.remove(resource.uri) + continue # Calculate the resource labels: labels = dict(extra_labels) @@ -1018,7 +1054,7 @@ def collect(self): family_objects.update(build_family_objects_res( self.resources, self.yaml_metric_groups, self.yaml_metrics, self.filename_metrics, - self.extra_labels)) + self.extra_labels, self.resource_cache)) log_exporter("Returning family objects") # Yield all family objects