-
Notifications
You must be signed in to change notification settings - Fork 2.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Tablet throttler: read and use MySQL host metrics #16904
Changes from 28 commits
71d98fe
9922731
214db0d
5660b77
2ec9f75
0741a2b
0b3666b
20e0577
df99856
a5d76d3
098e6a4
338c52c
4250909
07a3461
2c48d36
0d73d30
e3fccae
757cada
7dd4b8b
73fd43f
9166093
545112f
72b09b9
91d2e42
38b9d73
adfcf6d
d8b6ec2
6726285
79b964f
838e610
9d9d551
e25437a
14bae55
4c097f9
80891a0
3b45b2b
6a37aa2
5675ea8
ceb2f81
50f7f27
c2c3384
b3f3199
8adc7eb
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,156 @@ | ||
/* | ||
Copyright 2024 The Vitess Authors. | ||
|
||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
|
||
http://www.apache.org/licenses/LICENSE-2.0 | ||
|
||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
*/ | ||
|
||
package base | ||
|
||
import ( | ||
"context" | ||
"errors" | ||
"fmt" | ||
"sync/atomic" | ||
"time" | ||
|
||
"vitess.io/vitess/go/timer" | ||
|
||
tabletmanagerdatapb "vitess.io/vitess/go/vt/proto/tabletmanagerdata" | ||
) | ||
|
||
var ( | ||
mysqlHostMetricsRpcTimeout = 5 * time.Second | ||
mysqlHostMetricsRateLimit = 10 * time.Second | ||
mysqlHostMetricsRateLimiter atomic.Pointer[timer.RateLimiter] | ||
lastMySQLHostMetricsResponse atomic.Pointer[tabletmanagerdatapb.MysqlHostMetricsResponse] | ||
) | ||
|
||
// getMysqlMetricsRateLimiter returns a rate limiter that is active until the given context is cancelled. | ||
// This function will be called sequentially, but nonetheless it offers _some_ concurrent safety. Namely, | ||
// that a created rate limiter is guaranteed to be cleaned up | ||
func getMysqlMetricsRateLimiter(ctx context.Context, rateLimit time.Duration) *timer.RateLimiter { | ||
rateLimiter := mysqlHostMetricsRateLimiter.Load() | ||
if rateLimiter == nil { | ||
rateLimiter = timer.NewRateLimiter(rateLimit) | ||
go func() { | ||
defer mysqlHostMetricsRateLimiter.Store(nil) | ||
defer rateLimiter.Stop() | ||
<-ctx.Done() | ||
}() | ||
mysqlHostMetricsRateLimiter.Store(rateLimiter) | ||
} | ||
return rateLimiter | ||
} | ||
|
||
// readMysqlHostMetrics reads MySQL host metrics sporadically from the tablet manager (which in turn reads | ||
// them from mysql deamon). The metrics are then cached, whether successful or not. | ||
// This idea is that is is very wasteful to read these metrics for every single query. E.g. right now the throttler | ||
// can issue 4 reads per second, which is wasteful to go through two RPCs to get the disk space usage for example. Even the load | ||
// average on the MySQL server is not that susceptible to change. | ||
func readMysqlHostMetrics(ctx context.Context, params *SelfMetricReadParams) error { | ||
if params.TmClient == nil { | ||
return fmt.Errorf("tmClient is nil") | ||
} | ||
if params.TabletInfo == nil { | ||
return fmt.Errorf("tabletInfo is nil") | ||
} | ||
rateLimiter := getMysqlMetricsRateLimiter(ctx, mysqlHostMetricsRateLimit) | ||
err := rateLimiter.Do(func() error { | ||
ctx, cancel := context.WithTimeout(ctx, mysqlHostMetricsRpcTimeout) | ||
defer cancel() | ||
|
||
resp, err := params.TmClient.MysqlHostMetrics(ctx, params.TabletInfo.Tablet, &tabletmanagerdatapb.MysqlHostMetricsRequest{}) | ||
if err != nil { | ||
return err | ||
} | ||
lastMySQLHostMetricsResponse.Store(resp) | ||
return nil | ||
}) | ||
return err | ||
} | ||
|
||
// getMysqlHostMetric gets a metric from the last read MySQL host metrics. The metric will either be directly read from | ||
// tablet manager (which then reads it from the mysql deamon), or from the cache. | ||
func getMysqlHostMetric(ctx context.Context, params *SelfMetricReadParams, mysqlHostMetricName string) *ThrottleMetric { | ||
metric := &ThrottleMetric{ | ||
Scope: SelfScope, | ||
} | ||
if err := readMysqlHostMetrics(ctx, params); err != nil { | ||
return metric.WithError(err) | ||
} | ||
resp := lastMySQLHostMetricsResponse.Load() | ||
if resp == nil { | ||
return metric.WithError(ErrNoResultYet) | ||
} | ||
mysqlMetric := resp.HostMetrics.Metrics[mysqlHostMetricName] | ||
if mysqlMetric == nil { | ||
return metric.WithError(ErrNoSuchMetric) | ||
} | ||
metric.Value = mysqlMetric.Value | ||
if mysqlMetric.Error != nil { | ||
metric.Err = errors.New(mysqlMetric.Error.Message) | ||
} | ||
return metric | ||
} | ||
|
||
var _ SelfMetric = registerSelfMetric(&MysqldLoadAvgSelfMetric{}) | ||
var _ SelfMetric = registerSelfMetric(&MysqldDatadirUsedRatioSelfMetric{}) | ||
|
||
// MysqldLoadAvgSelfMetric stands for the load average per cpu, on the MySQL host. | ||
type MysqldLoadAvgSelfMetric struct { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also curious if this and the datadir type needs to be exported? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good question. Right now we're exporting all |
||
} | ||
|
||
func (m *MysqldLoadAvgSelfMetric) Name() MetricName { | ||
return MysqldLoadAvgMetricName | ||
} | ||
|
||
func (m *MysqldLoadAvgSelfMetric) DefaultScope() Scope { | ||
return SelfScope | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Having the type and variable exported makes this interface feel off to me. Since anyone can access the metric and this internal (?) variable as things are now. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yet no one holds an instance of var _ SelfMetric = registerSelfMetric(&MysqldLoadAvgSelfMetric{}) So yes, anyone can create their own new instance, but that cannot affect the existing instances. |
||
} | ||
|
||
func (m *MysqldLoadAvgSelfMetric) DefaultThreshold() float64 { | ||
return 1.0 | ||
} | ||
|
||
func (m *MysqldLoadAvgSelfMetric) RequiresConn() bool { | ||
return false | ||
} | ||
|
||
func (m *MysqldLoadAvgSelfMetric) Read(ctx context.Context, params *SelfMetricReadParams) *ThrottleMetric { | ||
return getMysqlHostMetric(ctx, params, "loadavg") | ||
} | ||
|
||
// MysqldDatadirUsedRatioSelfMetric stands for the disk space usage of the mount where MySQL's datadir is located. | ||
// Range: 0.0 (empty) - 1.0 (full) | ||
type MysqldDatadirUsedRatioSelfMetric struct { | ||
} | ||
|
||
func (m *MysqldDatadirUsedRatioSelfMetric) Name() MetricName { | ||
return MysqldDatadirUsedRatioMetricName | ||
} | ||
|
||
func (m *MysqldDatadirUsedRatioSelfMetric) DefaultScope() Scope { | ||
return SelfScope | ||
} | ||
|
||
func (m *MysqldDatadirUsedRatioSelfMetric) DefaultThreshold() float64 { | ||
return 0.98 | ||
} | ||
|
||
func (m *MysqldDatadirUsedRatioSelfMetric) RequiresConn() bool { | ||
return false | ||
} | ||
|
||
func (m *MysqldDatadirUsedRatioSelfMetric) Read(ctx context.Context, params *SelfMetricReadParams) *ThrottleMetric { | ||
return getMysqlHostMetric(ctx, params, "datadir-used-ratio") | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
How do these changes relate to the PR? Is it that we're getting rid of the old
loadavg
OS metric now that we have the newmysql-loadavg
metric and so we wanted to test another MySQL process metric here?Is there any reason that we're not using any of the new MySQL host metrics in the e2e tests? Or am I missing something here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
See #16904 (comment): it's about tests failing on MacOS machines.
You're not missing anything here. Let me see what we can do for e2e tests.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added
endtoend
tests that verify the fullMysqlHostMetrics
path don to mysqld, and checking response values. We check the existence of both newmysqld-*
values, and we specifically check thatmysqld-datadir-used-ratio
is non-zero (as can be expected in local hosts and in CI). We do not check the value ofmysqld-loadavg
as that could be arbitrarily low and even appear to be zero, as well as because we do not implement it in MacOS.