From 611a8d0af1a3f7ddfd7d2f38369c1fdf7ef493f3 Mon Sep 17 00:00:00 2001 From: Rene Peinthor Date: Fri, 13 Dec 2024 14:43:11 +0100 Subject: [PATCH] linstor: improve heartbeat check with also asking linstor If a node doesn't have a DRBD connection to another node, additionally ask Linstor-Controller if the node is alive. Otherwise we would have simply said no and the node might still be alive. This is always the case in a non hyperconverged setup. --- plugins/storage/volume/linstor/CHANGELOG.md | 6 +++ .../kvm/storage/LinstorStorageAdaptor.java | 16 ++++++++ .../kvm/storage/LinstorStoragePool.java | 40 ++++++++++++++++--- 3 files changed, 57 insertions(+), 5 deletions(-) diff --git a/plugins/storage/volume/linstor/CHANGELOG.md b/plugins/storage/volume/linstor/CHANGELOG.md index 930e139870f6..e1ff9a5e2691 100644 --- a/plugins/storage/volume/linstor/CHANGELOG.md +++ b/plugins/storage/volume/linstor/CHANGELOG.md @@ -5,6 +5,12 @@ All notable changes to Linstor CloudStack plugin will be documented in this file The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [2024-12-13] + +### Fixed + +- Linstor heartbeat check now also ask linstor-controller if there is no connection between nodes + ## [2024-10-28] ### Fixed diff --git a/plugins/storage/volume/linstor/src/main/java/com/cloud/hypervisor/kvm/storage/LinstorStorageAdaptor.java b/plugins/storage/volume/linstor/src/main/java/com/cloud/hypervisor/kvm/storage/LinstorStorageAdaptor.java index 1f71a54a4f35..90b1342c0a27 100644 --- a/plugins/storage/volume/linstor/src/main/java/com/cloud/hypervisor/kvm/storage/LinstorStorageAdaptor.java +++ b/plugins/storage/volume/linstor/src/main/java/com/cloud/hypervisor/kvm/storage/LinstorStorageAdaptor.java @@ -45,6 +45,7 @@ import com.linbit.linstor.api.DevelopersApi; import com.linbit.linstor.api.model.ApiCallRc; import com.linbit.linstor.api.model.ApiCallRcList; +import com.linbit.linstor.api.model.Node; import com.linbit.linstor.api.model.Properties; import com.linbit.linstor.api.model.ProviderKind; import com.linbit.linstor.api.model.Resource; @@ -712,4 +713,19 @@ public long getUsed(LinstorStoragePool pool) { throw new CloudRuntimeException(apiEx.getBestMessage(), apiEx); } } + + public boolean isNodeOnline(LinstorStoragePool pool, String nodeName) { + DevelopersApi linstorApi = getLinstorAPI(pool); + try { + List node = linstorApi.nodeList(Collections.singletonList(nodeName), Collections.emptyList(), null, null); + if (node == null || node.isEmpty()) { + return false; + } + + return Node.ConnectionStatusEnum.ONLINE.equals(node.get(0).getConnectionStatus()); + } catch (ApiException apiEx) { + s_logger.error(apiEx.getMessage()); + throw new CloudRuntimeException(apiEx.getBestMessage(), apiEx); + } + } } diff --git a/plugins/storage/volume/linstor/src/main/java/com/cloud/hypervisor/kvm/storage/LinstorStoragePool.java b/plugins/storage/volume/linstor/src/main/java/com/cloud/hypervisor/kvm/storage/LinstorStoragePool.java index 4077d5dadfd0..9a9c1a9717a2 100644 --- a/plugins/storage/volume/linstor/src/main/java/com/cloud/hypervisor/kvm/storage/LinstorStoragePool.java +++ b/plugins/storage/volume/linstor/src/main/java/com/cloud/hypervisor/kvm/storage/LinstorStoragePool.java @@ -279,22 +279,52 @@ private String executeDrbdSetupStatus(OutputInterpreter.AllLinesParser parser) { return sc.execute(parser); } + private boolean checkLinstorNodeOnline(String nodeName) { + return ((LinstorStorageAdaptor)_storageAdaptor).isNodeOnline(this, nodeName); + } + + /** + * Checks output of drbdsetup status output if this node has any valid connection to the specified + * otherNodeName. + * If there is no connection, ask the Linstor controller if the node is seen online and return false if not. + * If there is a connection but not connected(valid) return false. + * @param output Output of the drbdsetup status --json command + * @param otherNodeName Name of the node to check against + * @return true if we could say that this node thinks the node in question is reachable, otherwise false. + */ private boolean checkDrbdSetupStatusOutput(String output, String otherNodeName) { JsonParser jsonParser = new JsonParser(); JsonArray jResources = (JsonArray) jsonParser.parse(output); + boolean connectionFound = false; for (JsonElement jElem : jResources) { JsonObject jRes = (JsonObject) jElem; JsonArray jConnections = jRes.getAsJsonArray("connections"); for (JsonElement jConElem : jConnections) { JsonObject jConn = (JsonObject) jConElem; - if (jConn.getAsJsonPrimitive("name").getAsString().equals(otherNodeName) - && jConn.getAsJsonPrimitive("connection-state").getAsString().equalsIgnoreCase("Connected")) { - return true; + if (jConn.getAsJsonPrimitive("name").getAsString().equals(otherNodeName)) + { + connectionFound = true; + if (jConn.getAsJsonPrimitive("connection-state").getAsString() + .equalsIgnoreCase("Connected")) { + return true; + } } } } - s_logger.warn(String.format("checkDrbdSetupStatusOutput: no resource connected to %s.", otherNodeName)); - return false; + boolean otherNodeOnline = false; + if (connectionFound) { + s_logger.warn(String.format( + "checkingHeartBeat: connection found, but not in state 'Connected' to %s", otherNodeName)); + } else { + s_logger.warn(String.format( + "checkingHeartBeat: no resource connected to %s, checking LINSTOR", otherNodeName)); + otherNodeOnline = checkLinstorNodeOnline(otherNodeName); + } + s_logger.info(String.format( + "checkingHeartBeat: other node %s is %s.", + otherNodeName, + otherNodeOnline ? "online on controller" : "down")); + return otherNodeOnline; } private String executeDrbdEventsNow(OutputInterpreter.AllLinesParser parser) {