From 77cb0064cf786379032880f12a50793ba79f6411 Mon Sep 17 00:00:00 2001 From: Manan Gupta <35839558+GuptaManan100@users.noreply.github.com> Date: Tue, 5 Dec 2023 16:01:47 +0530 Subject: [PATCH] Postpone waiting for dba grants after restore has succeeded (#14680) Signed-off-by: Manan Gupta --- go/cmd/vttablet/cli/cli.go | 10 +----- go/vt/vtcombo/tablet_map.go | 2 +- go/vt/vttablet/tabletmanager/tm_init.go | 36 ++++++++++++++----- go/vt/vttablet/tabletmanager/tm_init_test.go | 22 ++++++------ go/vt/vttablet/tabletserver/tabletserver.go | 2 +- .../tabletserver/tabletserver_test.go | 7 ++++ go/vt/wrangler/fake_tablet_test.go | 2 +- go/vt/wrangler/testlib/fake_tablet.go | 2 +- 8 files changed, 50 insertions(+), 33 deletions(-) diff --git a/go/cmd/vttablet/cli/cli.go b/go/cmd/vttablet/cli/cli.go index bed53d284e8..d68856be9b6 100644 --- a/go/cmd/vttablet/cli/cli.go +++ b/go/cmd/vttablet/cli/cli.go @@ -102,10 +102,6 @@ vttablet \ } ) -const ( - dbaGrantWaitTime = 10 * time.Second -) - func run(cmd *cobra.Command, args []string) error { servenv.Init() @@ -155,7 +151,7 @@ func run(cmd *cobra.Command, args []string) error { VREngine: vreplication.NewEngine(config, ts, tabletAlias.Cell, mysqld, qsc.LagThrottler()), VDiffEngine: vdiff.NewEngine(config, ts, tablet), } - if err := tm.Start(tablet, config.Healthcheck.IntervalSeconds.Get()); err != nil { + if err := tm.Start(tablet, config); err != nil { ts.Close() return fmt.Errorf("failed to parse --tablet-path or initialize DB credentials: %w", err) } @@ -249,10 +245,6 @@ func createTabletServer(ctx context.Context, config *tabletenv.TabletConfig, ts return nil, fmt.Errorf("table acl config has to be specified with table-acl-config flag because enforce-tableacl-config is set.") } - err := tabletserver.WaitForDBAGrants(config, dbaGrantWaitTime) - if err != nil { - return nil, err - } // creates and registers the query service qsc := tabletserver.NewTabletServer(ctx, "", config, ts, tabletAlias) servenv.OnRun(func() { diff --git a/go/vt/vtcombo/tablet_map.go b/go/vt/vtcombo/tablet_map.go index 77b7f267a30..b9fc4af294b 100644 --- a/go/vt/vtcombo/tablet_map.go +++ b/go/vt/vtcombo/tablet_map.go @@ -117,7 +117,7 @@ func CreateTablet( Type: initTabletType, DbNameOverride: dbname, } - if err := tm.Start(tablet, 0); err != nil { + if err := tm.Start(tablet, nil); err != nil { return err } diff --git a/go/vt/vttablet/tabletmanager/tm_init.go b/go/vt/vttablet/tabletmanager/tm_init.go index c1b6c837d50..d65115990f1 100644 --- a/go/vt/vttablet/tabletmanager/tm_init.go +++ b/go/vt/vttablet/tabletmanager/tm_init.go @@ -71,10 +71,14 @@ import ( "vitess.io/vitess/go/vt/vttablet/tabletmanager/vdiff" "vitess.io/vitess/go/vt/vttablet/tabletmanager/vreplication" "vitess.io/vitess/go/vt/vttablet/tabletserver" + "vitess.io/vitess/go/vt/vttablet/tabletserver/tabletenv" ) -// Query rules from denylist -const denyListQueryList string = "DenyListQueryRules" +const ( + // Query rules from denylist + denyListQueryList string = "DenyListQueryRules" + dbaGrantWaitTime = 10 * time.Second +) var ( // The following flags initialize the tablet record. @@ -335,7 +339,7 @@ func mergeTags(a, b map[string]string) map[string]string { } // Start starts the TabletManager. -func (tm *TabletManager) Start(tablet *topodatapb.Tablet, healthCheckInterval time.Duration) error { +func (tm *TabletManager) Start(tablet *topodatapb.Tablet, config *tabletenv.TabletConfig) error { defer func() { log.Infof("TabletManager Start took ~%d ms", time.Since(servenv.GetInitStartTime()).Milliseconds()) }() @@ -395,7 +399,7 @@ func (tm *TabletManager) Start(tablet *topodatapb.Tablet, healthCheckInterval ti tm.exportStats() servenv.OnRun(tm.registerTabletManager) - restoring, err := tm.handleRestore(tm.BatchCtx) + restoring, err := tm.handleRestore(tm.BatchCtx, config) if err != nil { return err } @@ -408,8 +412,17 @@ func (tm *TabletManager) Start(tablet *topodatapb.Tablet, healthCheckInterval ti // We shouldn't use the base tablet type directly, since the type could have changed to PRIMARY // earlier in tm.checkPrimaryShip code. _, err = tm.initializeReplication(ctx, tm.Tablet().Type) + if err != nil { + return err + } + + // Make sure we have the correct privileges for the DBA user before we start the state manager. + err = tabletserver.WaitForDBAGrants(config, dbaGrantWaitTime) + if err != nil { + return err + } tm.tmState.Open() - return err + return nil } // Close prepares a tablet for shutdown. First we check our tablet ownership and @@ -764,7 +777,7 @@ func (tm *TabletManager) initTablet(ctx context.Context) error { return nil } -func (tm *TabletManager) handleRestore(ctx context.Context) (bool, error) { +func (tm *TabletManager) handleRestore(ctx context.Context, config *tabletenv.TabletConfig) (bool, error) { // Sanity check for inconsistent flags if tm.Cnf == nil && restoreFromBackup { return false, fmt.Errorf("you cannot enable --restore_from_backup without a my.cnf file") @@ -776,9 +789,6 @@ func (tm *TabletManager) handleRestore(ctx context.Context) (bool, error) { // Restore in the background if restoreFromBackup { go func() { - // Open the state manager after restore is done. - defer tm.tmState.Open() - // Zero date will cause us to use the latest, which is the default backupTime := time.Time{} // Or if a backup timestamp was specified then we use the last backup taken at or before that time @@ -803,6 +813,14 @@ func (tm *TabletManager) handleRestore(ctx context.Context) (bool, error) { if err := tm.RestoreData(ctx, logutil.NewConsoleLogger(), waitForBackupInterval, false /* deleteBeforeRestore */, backupTime, restoreToTimestamp, restoreToPos, mysqlShutdownTimeout); err != nil { log.Exitf("RestoreFromBackup failed: %v", err) } + + // Make sure we have the correct privileges for the DBA user before we start the state manager. + err := tabletserver.WaitForDBAGrants(config, dbaGrantWaitTime) + if err != nil { + log.Exitf("Failed waiting for DBA grants: %v", err) + } + // Open the state manager after restore is done. + tm.tmState.Open() }() return true, nil } diff --git a/go/vt/vttablet/tabletmanager/tm_init_test.go b/go/vt/vttablet/tabletmanager/tm_init_test.go index 148042bd6b1..16dddba7dfd 100644 --- a/go/vt/vttablet/tabletmanager/tm_init_test.go +++ b/go/vt/vttablet/tabletmanager/tm_init_test.go @@ -282,7 +282,7 @@ func TestCheckPrimaryShip(t *testing.T) { return nil }) require.NoError(t, err) - err = tm.Start(tablet, 0) + err = tm.Start(tablet, nil) require.NoError(t, err) ti, err = ts.GetTablet(ctx, alias) require.NoError(t, err) @@ -297,7 +297,7 @@ func TestCheckPrimaryShip(t *testing.T) { // correct and start as PRIMARY. err = ts.DeleteTablet(ctx, alias) require.NoError(t, err) - err = tm.Start(tablet, 0) + err = tm.Start(tablet, nil) require.NoError(t, err) ti, err = ts.GetTablet(ctx, alias) require.NoError(t, err) @@ -311,7 +311,7 @@ func TestCheckPrimaryShip(t *testing.T) { ti.Type = topodatapb.TabletType_PRIMARY err = ts.UpdateTablet(ctx, ti) require.NoError(t, err) - err = tm.Start(tablet, 0) + err = tm.Start(tablet, nil) require.NoError(t, err) ti, err = ts.GetTablet(ctx, alias) require.NoError(t, err) @@ -321,7 +321,7 @@ func TestCheckPrimaryShip(t *testing.T) { tm.Stop() // 5. Subsequent inits will still start the vttablet as PRIMARY. - err = tm.Start(tablet, 0) + err = tm.Start(tablet, nil) require.NoError(t, err) ti, err = ts.GetTablet(ctx, alias) require.NoError(t, err) @@ -353,7 +353,7 @@ func TestCheckPrimaryShip(t *testing.T) { return nil }) require.NoError(t, err) - err = tm.Start(tablet, 0) + err = tm.Start(tablet, nil) require.NoError(t, err) ti, err = ts.GetTablet(ctx, alias) require.NoError(t, err) @@ -380,7 +380,7 @@ func TestCheckPrimaryShip(t *testing.T) { "FAKE SET MASTER", "START SLAVE", } - err = tm.Start(tablet, 0) + err = tm.Start(tablet, nil) require.NoError(t, err) ti, err = ts.GetTablet(ctx, alias) require.NoError(t, err) @@ -407,7 +407,7 @@ func TestStartCheckMysql(t *testing.T) { DBConfigs: dbconfigs.NewTestDBConfigs(cp, cp, ""), QueryServiceControl: tabletservermock.NewController(), } - err := tm.Start(tablet, 0) + err := tm.Start(tablet, nil) require.NoError(t, err) defer tm.Stop() @@ -435,7 +435,7 @@ func TestStartFindMysqlPort(t *testing.T) { DBConfigs: &dbconfigs.DBConfigs{}, QueryServiceControl: tabletservermock.NewController(), } - err := tm.Start(tablet, 0) + err := tm.Start(tablet, nil) require.NoError(t, err) defer tm.Stop() @@ -511,7 +511,7 @@ func TestStartDoesNotUpdateReplicationDataForTabletInWrongShard(t *testing.T) { tablet := newTestTablet(t, 1, "ks", "-d0") require.NoError(t, err) - err = tm.Start(tablet, 0) + err = tm.Start(tablet, nil) assert.Contains(t, err.Error(), "existing tablet keyspace and shard ks/0 differ") tablets, err := ts.FindAllTabletAliasesInShard(ctx, "ks", "-d0") @@ -548,7 +548,7 @@ func TestCheckTabletTypeResets(t *testing.T) { return nil }) require.NoError(t, err) - err = tm.Start(tablet, 0) + err = tm.Start(tablet, nil) require.NoError(t, err) assert.Equal(t, tm.tmState.tablet.Type, tm.tmState.displayState.tablet.Type) ti, err = ts.GetTablet(ctx, alias) @@ -671,7 +671,7 @@ func newTestTM(t *testing.T, ts *topo.Server, uid int, keyspace, shard string) * DBConfigs: &dbconfigs.DBConfigs{}, QueryServiceControl: tabletservermock.NewController(), } - err := tm.Start(tablet, 0) + err := tm.Start(tablet, nil) require.NoError(t, err) // Wait for SrvKeyspace to be rebuilt. We know that it has been built diff --git a/go/vt/vttablet/tabletserver/tabletserver.go b/go/vt/vttablet/tabletserver/tabletserver.go index 12c6a40868f..5f9310add84 100644 --- a/go/vt/vttablet/tabletserver/tabletserver.go +++ b/go/vt/vttablet/tabletserver/tabletserver.go @@ -236,7 +236,7 @@ func NewTabletServer(ctx context.Context, name string, config *tabletenv.TabletC func WaitForDBAGrants(config *tabletenv.TabletConfig, waitTime time.Duration) error { // We don't wait for grants if the tablet is externally managed. Permissions // are then the responsibility of the DBA. - if config.DB.HasGlobalSettings() || waitTime == 0 { + if config == nil || config.DB.HasGlobalSettings() || waitTime == 0 { return nil } timer := time.NewTimer(waitTime) diff --git a/go/vt/vttablet/tabletserver/tabletserver_test.go b/go/vt/vttablet/tabletserver/tabletserver_test.go index 0f85e1018f5..a72a1aa76db 100644 --- a/go/vt/vttablet/tabletserver/tabletserver_test.go +++ b/go/vt/vttablet/tabletserver/tabletserver_test.go @@ -2739,6 +2739,13 @@ func TestWaitForDBAGrants(t *testing.T) { } return tc, func() {} }, + }, { + name: "Empty config", + waitTime: 300 * time.Millisecond, + errWanted: "", + setupFunc: func(t *testing.T) (*tabletenv.TabletConfig, func()) { + return nil, func() {} + }, }, } for _, tt := range tests { diff --git a/go/vt/wrangler/fake_tablet_test.go b/go/vt/wrangler/fake_tablet_test.go index 66d5cf474d6..cae4e8ffc41 100644 --- a/go/vt/wrangler/fake_tablet_test.go +++ b/go/vt/wrangler/fake_tablet_test.go @@ -201,7 +201,7 @@ func (ft *fakeTablet) StartActionLoop(t *testing.T, wr *Wrangler) { QueryServiceControl: tabletservermock.NewController(), VDiffEngine: vdiff2.NewEngine(config, wr.TopoServer(), ft.Tablet), } - if err := ft.TM.Start(ft.Tablet, 0); err != nil { + if err := ft.TM.Start(ft.Tablet, nil); err != nil { t.Fatal(err) } ft.Tablet = ft.TM.Tablet() diff --git a/go/vt/wrangler/testlib/fake_tablet.go b/go/vt/wrangler/testlib/fake_tablet.go index a1b30813f53..9c511185769 100644 --- a/go/vt/wrangler/testlib/fake_tablet.go +++ b/go/vt/wrangler/testlib/fake_tablet.go @@ -210,7 +210,7 @@ func (ft *FakeTablet) StartActionLoop(t *testing.T, wr *wrangler.Wrangler) { QueryServiceControl: tabletservermock.NewController(), VREngine: vreplication.NewTestEngine(wr.TopoServer(), ft.Tablet.Alias.Cell, ft.FakeMysqlDaemon, binlogplayer.NewFakeDBClient, binlogplayer.NewFakeDBClient, topoproto.TabletDbName(ft.Tablet), nil), } - if err := ft.TM.Start(ft.Tablet, 0); err != nil { + if err := ft.TM.Start(ft.Tablet, nil); err != nil { t.Fatalf("Error in tablet - %v, err - %v", topoproto.TabletAliasString(ft.Tablet.Alias), err.Error()) } ft.Tablet = ft.TM.Tablet()