diff --git a/README.md b/README.md
index d409723..30c80a5 100644
--- a/README.md
+++ b/README.md
@@ -4,7 +4,7 @@ vertx-cluster-watchdog
[](https://travis-ci.com/swisspush/vertx-cluster-watchdog)
[](https://codecov.io/gh/swisspost/vertx-cluster-watchdog)
-Checks if all your hazelcast cluster members are receiveing published messages over the bus.
+Checks if all your hazelcast cluster members are receiving published messages over the bus.
How to run the watchdog
-----------------------
@@ -63,3 +63,22 @@ Tests
-----
The tests try to simulate the cluster with multiple instances of the verticle. The amount of cluster members is injected over the config.
+
+Micrometer metrics
+------------------
+When enabled, `vertx-cluster-watchdog` is monitored with micrometer. The following metrics are available:
+* cluster_watchdog_members
+* cluster_watchdog_members_responded
+
+Example metrics:
+
+```
+# HELP cluster_watchdog_members Amount of members visible to the cluster
+# TYPE cluster_watchdog_members gauge
+cluster_watchdog_members 2.0
+# HELP cluster_watchdog_members_responded Amount of cluster members responded when accessed
+# TYPE cluster_watchdog_members_responded gauge
+cluster_watchdog_members_responded 2.0
+```
+
+To enable the metrics, set a `MeterRegistry` instance by calling `setMeterRegistry(MeterRegistry meterRegistry)` method in `ClusterWatchdog` class.
\ No newline at end of file
diff --git a/pom.xml b/pom.xml
index 56d29ca..9a53628 100644
--- a/pom.xml
+++ b/pom.xml
@@ -79,6 +79,11 @@
slf4j-simple
${slf4j.version}
+
+ io.micrometer
+ micrometer-core
+ ${micrometer.version}
+
junit
@@ -399,6 +404,7 @@
4.5.2
2.0.10
+ 1.12.13
2.6
2.15.1
4.4
diff --git a/src/main/java/org/swisspush/vertx/cluster/ClusterWatchdog.java b/src/main/java/org/swisspush/vertx/cluster/ClusterWatchdog.java
index 9f62416..009b399 100644
--- a/src/main/java/org/swisspush/vertx/cluster/ClusterWatchdog.java
+++ b/src/main/java/org/swisspush/vertx/cluster/ClusterWatchdog.java
@@ -1,5 +1,7 @@
package org.swisspush.vertx.cluster;
+import io.micrometer.core.instrument.Gauge;
+import io.micrometer.core.instrument.MeterRegistry;
import io.vertx.core.AbstractVerticle;
import io.vertx.core.Handler;
import io.vertx.core.Promise;
@@ -11,6 +13,7 @@
import java.text.SimpleDateFormat;
import java.util.*;
+import java.util.concurrent.atomic.AtomicLong;
public class ClusterWatchdog extends AbstractVerticle {
@@ -32,6 +35,9 @@ public class ClusterWatchdog extends AbstractVerticle {
private Map> healthCheckResponses;
private ClusterWatchdogHttpHandler clusterWatchdogHttpHandler;
+ private final AtomicLong atomicClusterMemberCountRequired = new AtomicLong(0);
+ private final AtomicLong atomicClusterMemberRespondersCount = new AtomicLong(0);
+
@Override
public void start(Promise startPromise) {
@@ -50,6 +56,7 @@ public void start(Promise startPromise) {
} else {
clusterMemberCount = clusterMemberCountFromConfig;
}
+ atomicClusterMemberRespondersCount.set(0);
int resultQueueLength = config.getInteger("resultQueueLength", 100);
log.info("ClusterWatchdog used resultQueueLength: " + resultQueueLength);
@@ -111,6 +118,15 @@ public void start(Promise startPromise) {
});
}
+ public void setMeterRegistry(MeterRegistry meterRegistry) {
+ if(meterRegistry != null) {
+ Gauge.builder("cluster.watchdog.members", atomicClusterMemberCountRequired, AtomicLong::get)
+ .description("Amount of members visible to the cluster").register(meterRegistry);
+ Gauge.builder("cluster.watchdog.members.responded", atomicClusterMemberRespondersCount, AtomicLong::get)
+ .description("Amount of cluster members responded when accessed").register(meterRegistry);
+ }
+ }
+
class ClusterCheckHandler implements Handler {
public void handle(Long event) {
@@ -135,6 +151,8 @@ public void handle(Long event) {
return;
}
+ atomicClusterMemberCountRequired.set(clusterMemberCount);
+
// publish the broadcast event which will us get the response of all the registered handlers
eb.publish(BROADCAST, testpayload);
@@ -148,6 +166,9 @@ public void handle(Long event) {
watchdogResult.time = time;
watchdogResult.verticleId = uniqueId;
watchdogResult.clusterMemberCount = clusterMemberCount;
+
+ atomicClusterMemberRespondersCount.set(responses != null ? responses.size() : 0);
+
if(responses == null) {
log.error("ClusterWatchdog found no responses for timestamp: " + timestamp);
watchdogResult.status = ClusterHealthStatus.INCONSISTENT;