From 1f23496f0fd88159c43923f080c0af6a991f9413 Mon Sep 17 00:00:00 2001 From: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> Date: Tue, 26 Nov 2024 16:26:36 +0200 Subject: [PATCH] Multi-metrics throttler: adding InnoDB `history_list_length` metric (#17262) Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com> --- .../throttler_topo/throttler_test.go | 34 ++++++++++ .../tabletserver/throttle/base/metric_name.go | 1 + .../throttle/base/metric_name_test.go | 4 +- .../self_metric_innodb_history_list_length.go | 68 +++++++++++++++++++ .../tabletserver/throttle/throttler_test.go | 17 +++++ 5 files changed, 123 insertions(+), 1 deletion(-) create mode 100644 go/vt/vttablet/tabletserver/throttle/base/self_metric_innodb_history_list_length.go diff --git a/go/test/endtoend/tabletmanager/throttler_topo/throttler_test.go b/go/test/endtoend/tabletmanager/throttler_topo/throttler_test.go index f96069c81b8..d5a1053d77d 100644 --- a/go/test/endtoend/tabletmanager/throttler_topo/throttler_test.go +++ b/go/test/endtoend/tabletmanager/throttler_topo/throttler_test.go @@ -175,6 +175,16 @@ func throttledApps(tablet *cluster.Vttablet) (resp *http.Response, respBody stri return resp, respBody, err } +func vitessThrottleCheck(tablet *cluster.Vttablet, skipRequestHeartbeats bool) (*vtctldatapb.CheckThrottlerResponse, error) { + flags := &throttle.CheckFlags{ + Scope: base.ShardScope, + SkipRequestHeartbeats: skipRequestHeartbeats, + MultiMetricsEnabled: true, + } + resp, err := throttler.CheckThrottler(clusterInstance, tablet, throttlerapp.VitessName, flags) + return resp, err +} + func throttleCheck(tablet *cluster.Vttablet, skipRequestHeartbeats bool) (*vtctldatapb.CheckThrottlerResponse, error) { flags := &throttle.CheckFlags{ Scope: base.ShardScope, @@ -305,6 +315,17 @@ func TestInitialThrottler(t *testing.T) { waitForThrottleCheckStatus(t, primaryTablet, tabletmanagerdatapb.CheckThrottlerResponseCode_THRESHOLD_EXCEEDED) }) t.Run("setting high threshold", func(t *testing.T) { + { + req := &vtctldatapb.UpdateThrottlerConfigRequest{MetricName: base.LoadAvgMetricName.String(), Threshold: 5555} + _, err := throttler.UpdateThrottlerTopoConfig(clusterInstance, req, nil, nil) + assert.NoError(t, err) + } + { + req := &vtctldatapb.UpdateThrottlerConfigRequest{MetricName: base.MysqldLoadAvgMetricName.String(), Threshold: 5555} + _, err := throttler.UpdateThrottlerTopoConfig(clusterInstance, req, nil, nil) + assert.NoError(t, err) + } + req := &vtctldatapb.UpdateThrottlerConfigRequest{Threshold: extremelyHighThreshold.Seconds()} _, err := throttler.UpdateThrottlerTopoConfig(clusterInstance, req, nil, nil) assert.NoError(t, err) @@ -317,6 +338,19 @@ func TestInitialThrottler(t *testing.T) { t.Run("validating OK response from throttler with high threshold", func(t *testing.T) { waitForThrottleCheckStatus(t, primaryTablet, tabletmanagerdatapb.CheckThrottlerResponseCode_OK) }) + t.Run("validating vitess app throttler check", func(t *testing.T) { + resp, err := vitessThrottleCheck(primaryTablet, true) + require.NoError(t, err) + for _, metricName := range base.KnownMetricNames { + t.Run(metricName.String(), func(t *testing.T) { + assert.Contains(t, resp.Check.Metrics, metricName.String()) + metric := resp.Check.Metrics[metricName.String()] + require.NotNil(t, metric) + assert.Equal(t, tabletmanagerdatapb.CheckThrottlerResponseCode_OK, metric.ResponseCode, "metric: %+v", metric) + }) + } + }) + t.Run("setting low threshold", func(t *testing.T) { req := &vtctldatapb.UpdateThrottlerConfigRequest{Threshold: throttler.DefaultThreshold.Seconds()} _, err := throttler.UpdateThrottlerTopoConfig(clusterInstance, req, nil, nil) diff --git a/go/vt/vttablet/tabletserver/throttle/base/metric_name.go b/go/vt/vttablet/tabletserver/throttle/base/metric_name.go index 607192b9c0c..43bd2d17a8c 100644 --- a/go/vt/vttablet/tabletserver/throttle/base/metric_name.go +++ b/go/vt/vttablet/tabletserver/throttle/base/metric_name.go @@ -65,6 +65,7 @@ const ( ThreadsRunningMetricName MetricName = "threads_running" CustomMetricName MetricName = "custom" LoadAvgMetricName MetricName = "loadavg" + HistoryListLengthMetricName MetricName = "history_list_length" MysqldLoadAvgMetricName MetricName = "mysqld-loadavg" MysqldDatadirUsedRatioMetricName MetricName = "mysqld-datadir-used-ratio" ) diff --git a/go/vt/vttablet/tabletserver/throttle/base/metric_name_test.go b/go/vt/vttablet/tabletserver/throttle/base/metric_name_test.go index ffd7f674cc2..c2e2b44b36f 100644 --- a/go/vt/vttablet/tabletserver/throttle/base/metric_name_test.go +++ b/go/vt/vttablet/tabletserver/throttle/base/metric_name_test.go @@ -241,15 +241,17 @@ func TestKnownMetricNames(t *testing.T) { assert.Contains(t, KnownMetricNames, LoadAvgMetricName) assert.Contains(t, KnownMetricNames, CustomMetricName) assert.Contains(t, KnownMetricNames, DefaultMetricName) + assert.Contains(t, KnownMetricNames, HistoryListLengthMetricName) assert.Contains(t, KnownMetricNames, MysqldLoadAvgMetricName) assert.Contains(t, KnownMetricNames, MysqldDatadirUsedRatioMetricName) } -func TestSingleWordCamelKnownMetricNames(t *testing.T) { +func TestKnownMetricNamesPascalCase(t *testing.T) { expectCases := map[MetricName]string{ LagMetricName: "Lag", ThreadsRunningMetricName: "ThreadsRunning", LoadAvgMetricName: "Loadavg", + HistoryListLengthMetricName: "HistoryListLength", CustomMetricName: "Custom", DefaultMetricName: "Default", MysqldLoadAvgMetricName: "MysqldLoadavg", diff --git a/go/vt/vttablet/tabletserver/throttle/base/self_metric_innodb_history_list_length.go b/go/vt/vttablet/tabletserver/throttle/base/self_metric_innodb_history_list_length.go new file mode 100644 index 00000000000..2696b1750ea --- /dev/null +++ b/go/vt/vttablet/tabletserver/throttle/base/self_metric_innodb_history_list_length.go @@ -0,0 +1,68 @@ +/* +Copyright 2024 The Vitess Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package base + +import ( + "context" + "math" + "sync/atomic" + "time" +) + +var ( + historyListLengthQuery = "select count as history_len from information_schema.INNODB_METRICS where name = 'trx_rseg_history_len'" + + cachedHistoryListLengthMetric atomic.Pointer[ThrottleMetric] + historyListLengthCacheDuration = 5 * time.Second + historyListLengthDefaultThreshold = math.Pow10(9) +) + +var _ SelfMetric = registerSelfMetric(&HistoryListLengthSelfMetric{}) + +type HistoryListLengthSelfMetric struct { +} + +func (m *HistoryListLengthSelfMetric) Name() MetricName { + return HistoryListLengthMetricName +} + +func (m *HistoryListLengthSelfMetric) DefaultScope() Scope { + return SelfScope +} + +func (m *HistoryListLengthSelfMetric) DefaultThreshold() float64 { + return historyListLengthDefaultThreshold +} + +func (m *HistoryListLengthSelfMetric) RequiresConn() bool { + return true +} + +func (m *HistoryListLengthSelfMetric) Read(ctx context.Context, params *SelfMetricReadParams) *ThrottleMetric { + // This function will be called sequentially, and therefore does not need strong mutex protection. Still, we use atomics + // to ensure correctness in case an external goroutine tries to read the metric concurrently. + metric := cachedHistoryListLengthMetric.Load() + if metric != nil { + return metric + } + metric = ReadSelfMySQLThrottleMetric(ctx, params.Conn, historyListLengthQuery) + cachedHistoryListLengthMetric.Store(metric) + time.AfterFunc(historyListLengthCacheDuration, func() { + cachedHistoryListLengthMetric.Store(nil) + }) + return metric +} diff --git a/go/vt/vttablet/tabletserver/throttle/throttler_test.go b/go/vt/vttablet/tabletserver/throttle/throttler_test.go index 0a1162b02d3..352e641fa35 100644 --- a/go/vt/vttablet/tabletserver/throttle/throttler_test.go +++ b/go/vt/vttablet/tabletserver/throttle/throttler_test.go @@ -71,6 +71,12 @@ var ( Value: 2.718, Err: nil, }, + base.HistoryListLengthMetricName: &base.ThrottleMetric{ + Scope: base.SelfScope, + Alias: "", + Value: 5, + Err: nil, + }, base.MysqldLoadAvgMetricName: &base.ThrottleMetric{ Scope: base.SelfScope, Alias: "", @@ -105,6 +111,11 @@ var ( ResponseCode: tabletmanagerdatapb.CheckThrottlerResponseCode_OK, Value: 5.1, }, + base.HistoryListLengthMetricName.String(): { + StatusCode: http.StatusOK, + ResponseCode: tabletmanagerdatapb.CheckThrottlerResponseCode_OK, + Value: 6, + }, base.MysqldLoadAvgMetricName.String(): { StatusCode: http.StatusOK, ResponseCode: tabletmanagerdatapb.CheckThrottlerResponseCode_OK, @@ -1853,6 +1864,7 @@ func TestChecks(t *testing.T) { assert.EqualValues(t, 26, checkResult.Metrics[base.ThreadsRunningMetricName.String()].Value) // self value, because flags.Scope is set assert.EqualValues(t, 17, checkResult.Metrics[base.CustomMetricName.String()].Value) // self value, because flags.Scope is set assert.EqualValues(t, 2.718, checkResult.Metrics[base.LoadAvgMetricName.String()].Value) // self value, because flags.Scope is set + assert.EqualValues(t, 5, checkResult.Metrics[base.HistoryListLengthMetricName.String()].Value) // self value, because flags.Scope is set assert.EqualValues(t, 0.3311, checkResult.Metrics[base.MysqldLoadAvgMetricName.String()].Value) // self value, because flags.Scope is set assert.EqualValues(t, 0.85, checkResult.Metrics[base.MysqldDatadirUsedRatioMetricName.String()].Value) // self value, because flags.Scope is set for _, metric := range checkResult.Metrics { @@ -1914,6 +1926,7 @@ func TestChecks(t *testing.T) { assert.EqualValues(t, 26, checkResult.Metrics[base.ThreadsRunningMetricName.String()].Value) // shard value, because flags.Scope is set assert.EqualValues(t, 17, checkResult.Metrics[base.CustomMetricName.String()].Value) // shard value, because flags.Scope is set assert.EqualValues(t, 5.1, checkResult.Metrics[base.LoadAvgMetricName.String()].Value) // shard value, because flags.Scope is set + assert.EqualValues(t, 6, checkResult.Metrics[base.HistoryListLengthMetricName.String()].Value) // shard value, because flags.Scope is set assert.EqualValues(t, 0.3311, checkResult.Metrics[base.MysqldLoadAvgMetricName.String()].Value) // shard value, because flags.Scope is set assert.EqualValues(t, 0.87, checkResult.Metrics[base.MysqldDatadirUsedRatioMetricName.String()].Value) // shard value, because flags.Scope is set for _, metric := range checkResult.Metrics { @@ -1948,6 +1961,7 @@ func TestChecks(t *testing.T) { assert.EqualValues(t, 26, checkResult.Metrics[base.ThreadsRunningMetricName.String()].Value) // self value, because "self" is the default scope for threads_running assert.EqualValues(t, 17, checkResult.Metrics[base.CustomMetricName.String()].Value) // self value, because "self" is the default scope for custom assert.EqualValues(t, 2.718, checkResult.Metrics[base.LoadAvgMetricName.String()].Value) // self value, because "self" is the default scope for loadavg + assert.EqualValues(t, 5, checkResult.Metrics[base.HistoryListLengthMetricName.String()].Value) // self value, because "self" is the default scope for loadavg assert.EqualValues(t, 0.3311, checkResult.Metrics[base.MysqldLoadAvgMetricName.String()].Value) // self value, because "self" is the default scope for loadavg assert.EqualValues(t, 0.85, checkResult.Metrics[base.MysqldDatadirUsedRatioMetricName.String()].Value) // self value, because "self" is the default scope for loadavg assert.EqualValues(t, base.ShardScope.String(), checkResult.Metrics[base.LagMetricName.String()].Scope) @@ -1970,6 +1984,7 @@ func TestChecks(t *testing.T) { base.MetricName("custom"), base.MetricName("shard/loadavg"), base.MetricName("shard/mysqld-loadavg"), + base.MetricName("self/history_list_length"), base.MetricName("self/mysqld-datadir-used-ratio"), base.MetricName("default"), } @@ -1986,6 +2001,7 @@ func TestChecks(t *testing.T) { assert.EqualValues(t, 26, checkResult.Metrics[base.ThreadsRunningMetricName.String()].Value) // shard value, even though scope name is in metric name assert.EqualValues(t, 17, checkResult.Metrics[base.CustomMetricName.String()].Value) // shard value because flags.Scope is set assert.EqualValues(t, 5.1, checkResult.Metrics[base.LoadAvgMetricName.String()].Value) // shard value, not because scope name is in metric name but because flags.Scope is set + assert.EqualValues(t, 6, checkResult.Metrics[base.HistoryListLengthMetricName.String()].Value) // shard value, even though scope name is in metric name assert.EqualValues(t, 0.3311, checkResult.Metrics[base.MysqldLoadAvgMetricName.String()].Value) // shard value, not because scope name is in metric name but because flags.Scope is set assert.EqualValues(t, 0.87, checkResult.Metrics[base.MysqldDatadirUsedRatioMetricName.String()].Value) // shard value, even though scope name is in metric name for _, metric := range checkResult.Metrics { @@ -2257,6 +2273,7 @@ func TestReplica(t *testing.T) { assert.Error(t, metricResult.Error, "metricName=%v, value=%v, threshold=%v", metricName, metricResult.Value, metricResult.Threshold) assert.ErrorIs(t, metricResult.Error, base.ErrThresholdExceeded) case base.ThreadsRunningMetricName, + base.HistoryListLengthMetricName, base.MysqldLoadAvgMetricName, base.MysqldDatadirUsedRatioMetricName: assert.NoError(t, metricResult.Error, "metricName=%v, value=%v, threshold=%v", metricName, metricResult.Value, metricResult.Threshold)