From 5eb1774189b2eca26c60761e2cfa5568cca2664b Mon Sep 17 00:00:00 2001 From: BinaryArchaism Date: Fri, 27 Oct 2023 19:54:43 +0300 Subject: [PATCH] Feat: Changed logic of healthz metric to signal about shutdown of server. Added value to healthz metric with error or "ok". Added test to this --- collector/collector_test.go | 24 ++++++++++++++++++++++++ collector/healthz.go | 15 ++++++++++----- 2 files changed, 34 insertions(+), 5 deletions(-) diff --git a/collector/collector_test.go b/collector/collector_test.go index a65977e..971c896 100644 --- a/collector/collector_test.go +++ b/collector/collector_test.go @@ -242,6 +242,30 @@ func TestConnz(t *testing.T) { verifyCollector(CoreSystem, url, "connz", cases, t) } +func TestHealthz(t *testing.T) { + s := pet.RunServer() + defer s.Shutdown() + + url := fmt.Sprintf("http://localhost:%d", pet.MonitorPort) + // see if we get the same stats as the original monitor testing code. + // just for our monitoring_port + + cases := map[string]float64{ + "gnatsd_healthz_status": 0, + } + + verifyCollector(CoreSystem, url, "healthz", cases, t) + + // test after server shutdown + s.Shutdown() + + cases = map[string]float64{ + "gnatsd_healthz_status": 1, + } + + verifyCollector(CoreSystem, url, "healthz", cases, t) +} + func TestNoServer(t *testing.T) { url := fmt.Sprintf("http://localhost:%d", pet.MonitorPort) diff --git a/collector/healthz.go b/collector/healthz.go index 6cb5f39..c495155 100644 --- a/collector/healthz.go +++ b/collector/healthz.go @@ -44,7 +44,7 @@ func newHealthzCollector(system, endpoint string, servers []*CollectedServer) pr status: prometheus.NewDesc( prometheus.BuildFQName(system, endpoint, "status"), "status", - []string{"server_id"}, + []string{"server_id", "value"}, nil, ), } @@ -69,16 +69,21 @@ func (nc *healthzCollector) Collect(ch chan<- prometheus.Metric) { for _, server := range nc.servers { var health Healthz if err := getMetricURL(nc.httpClient, server.URL, &health); err != nil { - Debugf("ignoring server %s: %v", server.ID, err) - continue + Debugf("error collecting server %s: %v", server.ID, err) + health.Error = err.Error() } - var status float64 = 1 + var ( + status float64 = 1 + value = health.Error + ) + if health.Status == "ok" { status = 0 + value = health.Status } - ch <- prometheus.MustNewConstMetric(nc.status, prometheus.GaugeValue, status, server.ID) + ch <- prometheus.MustNewConstMetric(nc.status, prometheus.GaugeValue, status, server.ID, value) } }