From 0b8a88ba7c369e7c13633e0da76d88fa18e0bc70 Mon Sep 17 00:00:00 2001 From: Will Morrison Date: Thu, 2 Jan 2025 11:35:26 -0500 Subject: [PATCH] Add a timeout for the backend stats collection query Default the stats collection query to use EXECUTE IMMEDIATE by default, with the option to use explicit PREPARE if desired --- .../ClusterStatsJdbcMonitor.java | 9 +++++++ .../ha/config/MonitorConfiguration.java | 27 +++++++++++++++++++ .../TestClusterStatsMonitor.java | 6 ++++- 3 files changed, 41 insertions(+), 1 deletion(-) diff --git a/gateway-ha/src/main/java/io/trino/gateway/ha/clustermonitor/ClusterStatsJdbcMonitor.java b/gateway-ha/src/main/java/io/trino/gateway/ha/clustermonitor/ClusterStatsJdbcMonitor.java index f62dacfb7..0df56f0e3 100644 --- a/gateway-ha/src/main/java/io/trino/gateway/ha/clustermonitor/ClusterStatsJdbcMonitor.java +++ b/gateway-ha/src/main/java/io/trino/gateway/ha/clustermonitor/ClusterStatsJdbcMonitor.java @@ -15,6 +15,7 @@ import com.google.common.util.concurrent.SimpleTimeLimiter; import io.airlift.log.Logger; +import io.airlift.units.Duration; import io.trino.gateway.ha.config.BackendStateConfiguration; import io.trino.gateway.ha.config.MonitorConfiguration; import io.trino.gateway.ha.config.ProxyBackendConfiguration; @@ -39,6 +40,7 @@ public class ClusterStatsJdbcMonitor private static final Logger log = Logger.get(ClusterStatsJdbcMonitor.class); private final Properties properties; // TODO Avoid using a mutable field + private final Duration queryTimeout; private static final String STATE_QUERY = "SELECT state, COUNT(*) as count " + "FROM runtime.queries " @@ -51,6 +53,12 @@ public ClusterStatsJdbcMonitor(BackendStateConfiguration backendStateConfigurati properties.setProperty("user", backendStateConfiguration.getUsername()); properties.setProperty("password", backendStateConfiguration.getPassword()); properties.setProperty("SSL", String.valueOf(backendStateConfiguration.getSsl())); + // explicitPrepare is a valid property for Trino versions >= 431. To avoid compatibility + // issues with versions < 431, this property is left unset when explicitPrepare=true, which is the default + if (!monitorConfiguration.isExplicitPrepare()) { + properties.setProperty("explicitPrepare", "false"); + } + queryTimeout = monitorConfiguration.getQueryTimeout(); log.info("state check configured"); } @@ -78,6 +86,7 @@ public ClusterStats monitor(ProxyBackendConfiguration backend) PreparedStatement statement = SimpleTimeLimiter.create(Executors.newSingleThreadExecutor()).callWithTimeout( () -> conn.prepareStatement(STATE_QUERY), 10, SECONDS)) { statement.setString(1, (String) properties.get("user")); + statement.setQueryTimeout((int) queryTimeout.roundTo(SECONDS)); Map partialState = new HashMap<>(); ResultSet rs = statement.executeQuery(); while (rs.next()) { diff --git a/gateway-ha/src/main/java/io/trino/gateway/ha/config/MonitorConfiguration.java b/gateway-ha/src/main/java/io/trino/gateway/ha/config/MonitorConfiguration.java index 6ae1e9763..0016fce98 100644 --- a/gateway-ha/src/main/java/io/trino/gateway/ha/config/MonitorConfiguration.java +++ b/gateway-ha/src/main/java/io/trino/gateway/ha/config/MonitorConfiguration.java @@ -13,14 +13,21 @@ */ package io.trino.gateway.ha.config; +import io.airlift.units.Duration; import io.trino.gateway.ha.clustermonitor.ActiveClusterMonitor; +import static java.util.concurrent.TimeUnit.SECONDS; + public class MonitorConfiguration { private int taskDelaySeconds = ActiveClusterMonitor.MONITOR_TASK_DELAY_SECONDS; private int retries; + private Duration queryTimeout = new Duration(10, SECONDS); + + private boolean explicitPrepare; + public MonitorConfiguration() {} public int getTaskDelaySeconds() @@ -42,4 +49,24 @@ public void setRetries(int retries) { this.retries = retries; } + + public Duration getQueryTimeout() + { + return queryTimeout; + } + + public void setQueryTimeout(Duration queryTimeout) + { + this.queryTimeout = queryTimeout; + } + + public boolean isExplicitPrepare() + { + return explicitPrepare; + } + + public void setExplicitPrepare(boolean explicitPrepare) + { + this.explicitPrepare = explicitPrepare; + } } diff --git a/gateway-ha/src/test/java/io/trino/gateway/ha/clustermonitor/TestClusterStatsMonitor.java b/gateway-ha/src/test/java/io/trino/gateway/ha/clustermonitor/TestClusterStatsMonitor.java index 3a9ab15be..99dc0a5af 100644 --- a/gateway-ha/src/test/java/io/trino/gateway/ha/clustermonitor/TestClusterStatsMonitor.java +++ b/gateway-ha/src/test/java/io/trino/gateway/ha/clustermonitor/TestClusterStatsMonitor.java @@ -15,6 +15,7 @@ import io.airlift.http.client.HttpClientConfig; import io.airlift.http.client.jetty.JettyHttpClient; +import io.airlift.units.Duration; import io.trino.gateway.ha.config.BackendStateConfiguration; import io.trino.gateway.ha.config.MonitorConfiguration; import io.trino.gateway.ha.config.ProxyBackendConfiguration; @@ -26,6 +27,7 @@ import java.util.function.Function; +import static java.util.concurrent.TimeUnit.SECONDS; import static org.assertj.core.api.Assertions.assertThat; import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_CLASS; import static org.testcontainers.utility.MountableFile.forClasspathResource; @@ -58,7 +60,9 @@ void testHttpMonitor() @Test void testJdbcMonitor() { - testClusterStatsMonitor(backendStateConfiguration -> new ClusterStatsJdbcMonitor(backendStateConfiguration, new MonitorConfiguration())); + MonitorConfiguration monitorConfigurationWithTimeout = new MonitorConfiguration(); + monitorConfigurationWithTimeout.setQueryTimeout(new Duration(30, SECONDS)); + testClusterStatsMonitor(backendStateConfiguration -> new ClusterStatsJdbcMonitor(backendStateConfiguration, monitorConfigurationWithTimeout)); } @Test