From 087c84f0dc73109cafe355f7385f3f21580b627c Mon Sep 17 00:00:00 2001 From: Andreas Maier Date: Sun, 12 Jun 2022 16:47:40 +0200 Subject: [PATCH] Automatic detection of HMC resources that no longer exist Details: * HMC resources that no longer exist are automatically removed from the exported metrics. (Issue #203) Signed-off-by: Andreas Maier --- docs/changes.rst | 3 ++ docs/usage.rst | 4 +++ minimum-constraints.txt | 3 +- requirements.txt | 5 +-- .../zhmc_prometheus_exporter.py | 32 +++++++++++++++++-- 5 files changed, 41 insertions(+), 6 deletions(-) diff --git a/docs/changes.rst b/docs/changes.rst index 020752c7..92d78248 100644 --- a/docs/changes.rst +++ b/docs/changes.rst @@ -32,6 +32,9 @@ Released: not yet **Enhancements:** +* HMC resources that no longer exist are automatically removed from the + exported metrics. (Issue #203) + **Cleanup:** **Known issues:** diff --git a/docs/usage.rst b/docs/usage.rst index 53eff92f..23008a91 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -276,6 +276,10 @@ retrieved from the HMC, but they are exported to Prometheus in the same way: representations and can immediately return them without having to turn around for getting them from the HMC. + Resources that no longer exist on the HMC are automatically not exported + anymore. Resources that were created on the HMC since the exporter was + started are not delected. + The exporter code is agnostic to the actual set of metrics supported by the HMC. A new metric exposed by the HMC metric service or a new property added to one of the auto-updated resources can immediately be supported by just adding it to diff --git a/minimum-constraints.txt b/minimum-constraints.txt index 47e95b3c..daf8731b 100644 --- a/minimum-constraints.txt +++ b/minimum-constraints.txt @@ -71,7 +71,8 @@ wheel==0.33.5; python_version >= '3.8' # Direct dependencies for runtime (must be consistent with requirements.txt) -zhmcclient==1.3.1 +# TODO: Use zhmcclient 1.4.0 once released, before releasing this project. +# zhmcclient==1.4.0 prometheus-client==0.9.0 urllib3==1.25.9; python_version <= '3.9' diff --git a/requirements.txt b/requirements.txt index 443b3447..c692ed60 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,8 +6,9 @@ # Direct dependencies for runtime (must be consistent with minimum-constraints.txt) -# git+https://github.com/zhmcclient/python-zhmcclient.git@stable_1.3#egg=zhmcclient -zhmcclient>=1.3.1 +# TODO: Use zhmcclient 1.4.0 once released, before releasing this project. +git+https://github.com/zhmcclient/python-zhmcclient.git@master#egg=zhmcclient +# zhmcclient>=1.4.0 prometheus-client>=0.9.0 urllib3>=1.25.9; python_version <= '3.9' diff --git a/zhmc_prometheus_exporter/zhmc_prometheus_exporter.py b/zhmc_prometheus_exporter/zhmc_prometheus_exporter.py index 3934a10f..b9265ca3 100755 --- a/zhmc_prometheus_exporter/zhmc_prometheus_exporter.py +++ b/zhmc_prometheus_exporter/zhmc_prometheus_exporter.py @@ -660,6 +660,16 @@ def resource(self, uri, object_value): self._resources[uri] = _resource return _resource + # def remove(self, uri): + # """ + # Remove the resource with a specified URI from the cache, if present. + # If not present, nothing happens. + # """ + # try: + # del self._resources[uri] + # except KeyError: + # pass + def build_family_objects(metrics_object, yaml_metric_groups, yaml_metrics, metrics_filename, extra_labels, @@ -776,7 +786,8 @@ def build_family_objects(metrics_object, yaml_metric_groups, yaml_metrics, def build_family_objects_res( resources, yaml_metric_groups, yaml_metrics, metrics_filename, - extra_labels): + extra_labels, resource_cache=None): + # pylint: disable=unused-argument """ Go through all auto-updated resources and build the Prometheus Family objects for them. @@ -793,7 +804,22 @@ def build_family_objects_res( for metric_group, res_list in resources.items(): yaml_metric_group = yaml_metric_groups[metric_group] - for resource in res_list: + for i, resource in enumerate(list(res_list)): + # Note: We use list() because resources that no longer exist will + # be removed from the original list, so this provides a stable + # iteration when items are removed from the original list. + + if resource.ceased_existence: + try: + res_str = resource.name + except zhmcclient.CeasedExistence: + # For attribute 'name', the exception is only raised when + # the name is not yet known locally. + res_str = "with URI {}".format(resource.uri) + verbose2("Resource no longer exists on HMC: {} {}". + format(resource.manager.class_name, res_str)) + del res_list[i] + continue # Calculate the resource labels: labels = dict(extra_labels) @@ -1018,7 +1044,7 @@ def collect(self): family_objects.update(build_family_objects_res( self.resources, self.yaml_metric_groups, self.yaml_metrics, self.filename_metrics, - self.extra_labels)) + self.extra_labels, self.resource_cache)) log_exporter("Returning family objects") # Yield all family objects