From e45438b8e47d89e6371eda981f037c50b2715ad4 Mon Sep 17 00:00:00 2001 From: Dirkjan Bussink Date: Tue, 26 Mar 2024 13:48:25 +0100 Subject: [PATCH 1/3] Setup individual timeouts for each binary Until now we've had the same timeouts for shutdown etc. across the different binaries. This doesn't provide the necessary flexibility though, since for example `mysqlctld` will need longer timeouts. This is because it needs to shut down MySQL and we need it to honor the MySQL shutdown timeout as well. This means this can take significantly longer to ensure the shutdown is clean. It's necessary to do this, to ensure things like being able to upgrade MySQL since that depends on clean shutdowns. Signed-off-by: Dirkjan Bussink --- go/cmd/mysqlctld/cli/mysqlctld.go | 8 ++++++- go/flags/endtoend/mysqlctld.txt | 2 +- go/vt/servenv/run.go | 8 +++---- go/vt/servenv/servenv.go | 37 ++++++++++++++++++++++++++----- 4 files changed, 43 insertions(+), 12 deletions(-) diff --git a/go/cmd/mysqlctld/cli/mysqlctld.go b/go/cmd/mysqlctld/cli/mysqlctld.go index 2dff7da0f7f..8dacf8d9d9f 100644 --- a/go/cmd/mysqlctld/cli/mysqlctld.go +++ b/go/cmd/mysqlctld/cli/mysqlctld.go @@ -71,12 +71,18 @@ var ( PreRunE: servenv.CobraPreRunE, RunE: run, } + + timeouts = &servenv.TimeoutFlags{ + LameduckPeriod: 50 * time.Millisecond, + OnTermTimeout: shutdownWaitTime + 10*time.Second, + OnCloseTimeout: 10 * time.Second, + } ) func init() { servenv.RegisterDefaultFlags() servenv.RegisterDefaultSocketFileFlags() - servenv.RegisterFlags() + servenv.RegisterFlagsWithTimeouts(timeouts) servenv.RegisterGRPCServerFlags() servenv.RegisterGRPCServerAuthFlags() servenv.RegisterServiceMapFlag() diff --git a/go/flags/endtoend/mysqlctld.txt b/go/flags/endtoend/mysqlctld.txt index 0dde59e0d7d..6bb1beb5bae 100644 --- a/go/flags/endtoend/mysqlctld.txt +++ b/go/flags/endtoend/mysqlctld.txt @@ -103,7 +103,7 @@ Flags: --mysqlctl_mycnf_template string template file to use for generating the my.cnf file during server init --mysqlctl_socket string socket file to use for remote mysqlctl actions (empty for local actions) --onclose_timeout duration wait no more than this for OnClose handlers before stopping (default 10s) - --onterm_timeout duration wait no more than this for OnTermSync handlers before stopping (default 10s) + --onterm_timeout duration wait no more than this for OnTermSync handlers before stopping (default 5m10s) --pid_file string If set, the process will write its pid to the named file, and delete it on graceful shutdown. --pool_hostname_resolve_interval duration if set force an update to all hostnames and reconnect if changed, defaults to 0 (disabled) --port int port for the server diff --git a/go/vt/servenv/run.go b/go/vt/servenv/run.go index 6f028786eaf..29b15a40008 100644 --- a/go/vt/servenv/run.go +++ b/go/vt/servenv/run.go @@ -62,18 +62,18 @@ func Run(bindAddress string, port int) { l.Close() startTime := time.Now() - log.Infof("Entering lameduck mode for at least %v", lameduckPeriod) + log.Infof("Entering lameduck mode for at least %v", timeouts.LameduckPeriod) log.Infof("Firing asynchronous OnTerm hooks") go onTermHooks.Fire() - fireOnTermSyncHooks(onTermTimeout) - if remain := lameduckPeriod - time.Since(startTime); remain > 0 { + fireOnTermSyncHooks(timeouts.OnTermTimeout) + if remain := timeouts.LameduckPeriod - time.Since(startTime); remain > 0 { log.Infof("Sleeping an extra %v after OnTermSync to finish lameduck period", remain) time.Sleep(remain) } log.Info("Shutting down gracefully") - fireOnCloseHooks(onCloseTimeout) + fireOnCloseHooks(timeouts.OnCloseTimeout) ListeningURL = url.URL{} } diff --git a/go/vt/servenv/servenv.go b/go/vt/servenv/servenv.go index 6a7898501f8..4aa9818eb7d 100644 --- a/go/vt/servenv/servenv.go +++ b/go/vt/servenv/servenv.go @@ -75,24 +75,33 @@ var ( // Flags specific to Init, Run, and RunDefault functions. var ( - lameduckPeriod = 50 * time.Millisecond - onTermTimeout = 10 * time.Second - onCloseTimeout = 10 * time.Second catchSigpipe bool maxStackSize = 64 * 1024 * 1024 initStartTime time.Time // time when tablet init started: for debug purposes to time how long a tablet init takes tableRefreshInterval int ) +type TimeoutFlags struct { + LameduckPeriod time.Duration + OnTermTimeout time.Duration + OnCloseTimeout time.Duration +} + +var timeouts = &TimeoutFlags{ + LameduckPeriod: 50 * time.Millisecond, + OnTermTimeout: 10 * time.Second, + OnCloseTimeout: 10 * time.Second, +} + // RegisterFlags installs the flags used by Init, Run, and RunDefault. // // This must be called before servenv.ParseFlags if using any of those // functions. func RegisterFlags() { OnParse(func(fs *pflag.FlagSet) { - fs.DurationVar(&lameduckPeriod, "lameduck-period", lameduckPeriod, "keep running at least this long after SIGTERM before stopping") - fs.DurationVar(&onTermTimeout, "onterm_timeout", onTermTimeout, "wait no more than this for OnTermSync handlers before stopping") - fs.DurationVar(&onCloseTimeout, "onclose_timeout", onCloseTimeout, "wait no more than this for OnClose handlers before stopping") + fs.DurationVar(&timeouts.LameduckPeriod, "lameduck-period", timeouts.LameduckPeriod, "keep running at least this long after SIGTERM before stopping") + fs.DurationVar(&timeouts.OnTermTimeout, "onterm_timeout", timeouts.OnTermTimeout, "wait no more than this for OnTermSync handlers before stopping") + fs.DurationVar(&timeouts.OnCloseTimeout, "onclose_timeout", timeouts.OnCloseTimeout, "wait no more than this for OnClose handlers before stopping") fs.BoolVar(&catchSigpipe, "catch-sigpipe", catchSigpipe, "catch and ignore SIGPIPE on stdout and stderr if specified") fs.IntVar(&maxStackSize, "max-stack-size", maxStackSize, "configure the maximum stack size in bytes") fs.IntVar(&tableRefreshInterval, "table-refresh-interval", tableRefreshInterval, "interval in milliseconds to refresh tables in status page with refreshRequired class") @@ -102,6 +111,22 @@ func RegisterFlags() { }) } +func RegisterFlagsWithTimeouts(tf *TimeoutFlags) { + OnParse(func(fs *pflag.FlagSet) { + fs.DurationVar(&tf.LameduckPeriod, "lameduck-period", tf.LameduckPeriod, "keep running at least this long after SIGTERM before stopping") + fs.DurationVar(&tf.OnTermTimeout, "onterm_timeout", tf.OnTermTimeout, "wait no more than this for OnTermSync handlers before stopping") + fs.DurationVar(&tf.OnCloseTimeout, "onclose_timeout", tf.OnCloseTimeout, "wait no more than this for OnClose handlers before stopping") + fs.BoolVar(&catchSigpipe, "catch-sigpipe", catchSigpipe, "catch and ignore SIGPIPE on stdout and stderr if specified") + fs.IntVar(&maxStackSize, "max-stack-size", maxStackSize, "configure the maximum stack size in bytes") + fs.IntVar(&tableRefreshInterval, "table-refresh-interval", tableRefreshInterval, "interval in milliseconds to refresh tables in status page with refreshRequired class") + + // pid_file.go + fs.StringVar(&pidFile, "pid_file", pidFile, "If set, the process will write its pid to the named file, and delete it on graceful shutdown.") + + timeouts = tf + }) +} + func GetInitStartTime() time.Time { mu.Lock() defer mu.Unlock() From f1a508fc25ea76e219e7563fdd36aa25dc159d4e Mon Sep 17 00:00:00 2001 From: Dirkjan Bussink Date: Tue, 26 Mar 2024 20:51:10 +0100 Subject: [PATCH 2/3] Use shared constant for default timeout value Signed-off-by: Dirkjan Bussink --- go/cmd/vtbackup/cli/vtbackup.go | 2 +- go/cmd/vtcombo/cli/main.go | 14 ++++++-------- go/vt/mysqlctl/grpcmysqlctlserver/server.go | 5 +---- go/vt/mysqlctl/mycnf.go | 2 ++ go/vt/vttablet/tabletmanager/tm_init.go | 2 +- 5 files changed, 11 insertions(+), 14 deletions(-) diff --git a/go/cmd/vtbackup/cli/vtbackup.go b/go/cmd/vtbackup/cli/vtbackup.go index 1f6f62f7ad1..3afc21bda7f 100644 --- a/go/cmd/vtbackup/cli/vtbackup.go +++ b/go/cmd/vtbackup/cli/vtbackup.go @@ -87,7 +87,7 @@ var ( mysqlPort = 3306 mysqlSocket string mysqlTimeout = 5 * time.Minute - mysqlShutdownTimeout = 5 * time.Minute + mysqlShutdownTimeout = mysqlctl.DefaultShutdownTimeout initDBSQLFile string detachedMode bool keepAliveTimeout time.Duration diff --git a/go/cmd/vtcombo/cli/main.go b/go/cmd/vtcombo/cli/main.go index 16e625a6ae6..d18c22ddfbb 100644 --- a/go/cmd/vtcombo/cli/main.go +++ b/go/cmd/vtcombo/cli/main.go @@ -168,8 +168,6 @@ func startMysqld(uid uint32) (mysqld *mysqlctl.Mysqld, cnf *mysqlctl.Mycnf, err return mysqld, cnf, nil } -const mysqlShutdownTimeout = 5 * time.Minute - func run(cmd *cobra.Command, args []string) (err error) { // Stash away a copy of the topology that vtcombo was started with. // @@ -218,9 +216,9 @@ func run(cmd *cobra.Command, args []string) (err error) { return err } servenv.OnClose(func() { - ctx, cancel := context.WithTimeout(context.Background(), mysqlShutdownTimeout+10*time.Second) + ctx, cancel := context.WithTimeout(context.Background(), mysqlctl.DefaultShutdownTimeout+10*time.Second) defer cancel() - mysqld.Shutdown(ctx, cnf, true, mysqlShutdownTimeout) + mysqld.Shutdown(ctx, cnf, true, mysqlctl.DefaultShutdownTimeout) }) // We want to ensure we can write to this database mysqld.SetReadOnly(false) @@ -242,9 +240,9 @@ func run(cmd *cobra.Command, args []string) (err error) { if err != nil { // ensure we start mysql in the event we fail here if startMysql { - ctx, cancel := context.WithTimeout(context.Background(), mysqlShutdownTimeout+10*time.Second) + ctx, cancel := context.WithTimeout(context.Background(), mysqlctl.DefaultShutdownTimeout+10*time.Second) defer cancel() - mysqld.Shutdown(ctx, cnf, true, mysqlShutdownTimeout) + mysqld.Shutdown(ctx, cnf, true, mysqlctl.DefaultShutdownTimeout) } return fmt.Errorf("initTabletMapProto failed: %w", err) @@ -291,9 +289,9 @@ func run(cmd *cobra.Command, args []string) (err error) { err := topotools.RebuildKeyspace(context.Background(), logutil.NewConsoleLogger(), ts, ks.GetName(), tpb.Cells, false) if err != nil { if startMysql { - ctx, cancel := context.WithTimeout(context.Background(), mysqlShutdownTimeout+10*time.Second) + ctx, cancel := context.WithTimeout(context.Background(), mysqlctl.DefaultShutdownTimeout+10*time.Second) defer cancel() - mysqld.Shutdown(ctx, cnf, true, mysqlShutdownTimeout) + mysqld.Shutdown(ctx, cnf, true, mysqlctl.DefaultShutdownTimeout) } return fmt.Errorf("Couldn't build srv keyspace for (%v: %v). Got error: %w", ks, tpb.Cells, err) diff --git a/go/vt/mysqlctl/grpcmysqlctlserver/server.go b/go/vt/mysqlctl/grpcmysqlctlserver/server.go index 38ce4b2b2df..2a703a50a84 100644 --- a/go/vt/mysqlctl/grpcmysqlctlserver/server.go +++ b/go/vt/mysqlctl/grpcmysqlctlserver/server.go @@ -22,7 +22,6 @@ package grpcmysqlctlserver import ( "context" - "time" "google.golang.org/grpc" @@ -43,8 +42,6 @@ func (s *server) Start(ctx context.Context, request *mysqlctlpb.StartRequest) (* return &mysqlctlpb.StartResponse{}, s.mysqld.Start(ctx, s.cnf, request.MysqldArgs...) } -const mysqlShutdownTimeout = 5 * time.Minute - // Shutdown implements the server side of the MysqlctlClient interface. func (s *server) Shutdown(ctx context.Context, request *mysqlctlpb.ShutdownRequest) (*mysqlctlpb.ShutdownResponse, error) { timeout, ok, err := protoutil.DurationFromProto(request.MysqlShutdownTimeout) @@ -52,7 +49,7 @@ func (s *server) Shutdown(ctx context.Context, request *mysqlctlpb.ShutdownReque return nil, err } if !ok { - timeout = mysqlShutdownTimeout + timeout = mysqlctl.DefaultShutdownTimeout } return &mysqlctlpb.ShutdownResponse{}, s.mysqld.Shutdown(ctx, s.cnf, request.WaitForMysqld, timeout) } diff --git a/go/vt/mysqlctl/mycnf.go b/go/vt/mysqlctl/mycnf.go index 7ae2d5d0aa9..c4ee062348b 100644 --- a/go/vt/mysqlctl/mycnf.go +++ b/go/vt/mysqlctl/mycnf.go @@ -31,6 +31,8 @@ import ( "time" ) +const DefaultShutdownTimeout = 5 * time.Minute + // Mycnf is a memory structure that contains a bunch of interesting // parameters to start mysqld. It can be used to generate standard // my.cnf files from a server id and mysql port. It can also be diff --git a/go/vt/vttablet/tabletmanager/tm_init.go b/go/vt/vttablet/tabletmanager/tm_init.go index 6a1a4f1b730..efb6c5e878f 100644 --- a/go/vt/vttablet/tabletmanager/tm_init.go +++ b/go/vt/vttablet/tabletmanager/tm_init.go @@ -92,7 +92,7 @@ var ( initTags flagutil.StringMapValue initTimeout = 1 * time.Minute - mysqlShutdownTimeout = 5 * time.Minute + mysqlShutdownTimeout = mysqlctl.DefaultShutdownTimeout ) func registerInitFlags(fs *pflag.FlagSet) { From c204f9aabff448b2ef0049f102458022ac00eff7 Mon Sep 17 00:00:00 2001 From: Dirkjan Bussink Date: Tue, 26 Mar 2024 21:02:09 +0100 Subject: [PATCH 3/3] Add release notes item Signed-off-by: Dirkjan Bussink --- changelog/20.0/20.0.0/summary.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/changelog/20.0/20.0.0/summary.md b/changelog/20.0/20.0.0/summary.md index 15455ef502c..49642dc8734 100644 --- a/changelog/20.0/20.0.0/summary.md +++ b/changelog/20.0/20.0.0/summary.md @@ -6,6 +6,7 @@ - **[Breaking changes](#breaking-changes)** - [`shutdown_grace_period` Default Change](#shutdown-grace-period-default) - [New `unmanaged` Flag and `disable_active_reparents` deprecation](#unmanaged-flag) + - [`mysqlctld` `onterm-timeout` Default Change](#mysqlctld-onterm-timeout) - **[Query Compatibility](#query-compatibility)** - [Vindex Hints](#vindex-hints) - [Update with Limit Support](#update-limit) @@ -38,6 +39,12 @@ New flag `--unmanaged` has been introduced in this release to make it easier to Starting this release, all unmanaged tablets should specify this flag. +#### `mysqlctld` `onterm_timeout` Default Change + +The `--onterm_timeout` flag default value has changed for `mysqlctld`. It now is by default long enough to be able to wait for the default `--shutdown-wait-time` when shutting down on a `TERM` signal. + +This is necessary since otherwise MySQL would never shut down cleanly with the old defaults, since `mysqlctld` would shut down already after 10 seconds by default. + ### Query Compatibility #### Vindex Hints