Skip to content

Commit

Permalink
feat: address review comments and update release notes
Browse files Browse the repository at this point in the history
Signed-off-by: Manan Gupta <[email protected]>
  • Loading branch information
GuptaManan100 committed Sep 28, 2023
1 parent 152d1bc commit df77a39
Show file tree
Hide file tree
Showing 5 changed files with 19 additions and 47 deletions.
8 changes: 2 additions & 6 deletions changelog/18.0/18.0.0/summary.md
Original file line number Diff line number Diff line change
Expand Up @@ -196,12 +196,8 @@ vtbackup_restore_count{component="BackupStorage",implementation="S3",operation="
#### <a id="vtctld-and-vtorc-reparenting-stats"/>VTCtld and VTOrc reparenting stats

New VTCtld and VTorc stats were added to measure frequency of reparents by keyspace/shard:
- `emergency_reparent_shards` - Number of times Emergency Reparent Shard has been run
- `emergency_reparent_shards_failed` - Number of times Emergency Reparent Shard has failed
- `emergency_reparent_shards_succeeded` - Number of times Emergency Reparent Shard has succeeded
- `planned_reparent_shards` - Number of times Planned Reparent Shard has been run
- `planned_reparent_shards_failed` - Number of times Planned Reparent Shard has failed
- `planned_reparent_shards_succeeded` - Number of times Planned Reparent Shard has succeeded
- `emergency_reparent_counts` - Number of times Emergency Reparent Shard has been run. It is further subdivided by the keyspace, shard and the result of the operation.
- `planned_reparent_counts` - Number of times Planned Reparent Shard has been run. It is further subdivided by the keyspace, shard and the result of the operation.

Also, the `reparent_shard_operation_timings` stat was added to provide per-operation timings of reparent operations.

Expand Down
17 changes: 5 additions & 12 deletions go/vt/vtctl/reparentutil/emergency_reparenter.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,14 +74,8 @@ var (
legacyERSSuccessCounter = stats.NewGauge("ers_success_counter", "Number of times Emergency Reparent Shard has succeeded")
legacyERSFailureCounter = stats.NewGauge("ers_failure_counter", "Number of times Emergency Reparent Shard has failed")

ersCounter = stats.NewCountersWithMultiLabels("emergency_reparent_shards", "Number of times Emergency Reparent Shard has been run",
[]string{"Keyspace", "Shard"},
)
ersFailureCounter = stats.NewCountersWithMultiLabels("emergency_reparent_shards_failed", "Number of times Emergency Reparent Shard has failed",
[]string{"Keyspace", "Shard"},
)
ersSuccessCounter = stats.NewCountersWithMultiLabels("emergency_reparent_shards_succeeded", "Number of times Emergency Reparent Shard has succeeded",
[]string{"Keyspace", "Shard"},
ersCounter = stats.NewCountersWithMultiLabels("emergency_reparent_counts", "Number of times Emergency Reparent Shard has been run",
[]string{"Keyspace", "Shard", "Result"},
)
)

Expand Down Expand Up @@ -111,15 +105,14 @@ func NewEmergencyReparenter(ts *topo.Server, tmc tmclient.TabletManagerClient, l
func (erp *EmergencyReparenter) ReparentShard(ctx context.Context, keyspace string, shard string, opts EmergencyReparentOptions) (*events.Reparent, error) {
var err error
statsLabels := []string{keyspace, shard}
ersCounter.Add(statsLabels, 1)

opts.lockAction = erp.getLockAction(opts.NewPrimaryAlias)
// First step is to lock the shard for the given operation, if not already locked
if err = topo.CheckShardLocked(ctx, keyspace, shard); err != nil {
var unlock func(*error)
ctx, unlock, err = erp.ts.LockShard(ctx, keyspace, shard, opts.lockAction)
if err != nil {
ersFailureCounter.Add(statsLabels, 1)
ersCounter.Add(append(statsLabels, failureResult), 1)
return nil, err
}
defer unlock(&err)
Expand All @@ -133,11 +126,11 @@ func (erp *EmergencyReparenter) ReparentShard(ctx context.Context, keyspace stri
switch err {
case nil:
legacyERSSuccessCounter.Add(1)
ersSuccessCounter.Add(statsLabels, 1)
ersCounter.Add(append(statsLabels, successResult), 1)
event.DispatchUpdate(ev, "finished EmergencyReparentShard")
default:
legacyERSFailureCounter.Add(1)
ersFailureCounter.Add(statsLabels, 1)
ersCounter.Add(append(statsLabels, failureResult), 1)
event.DispatchUpdate(ev, "failed EmergencyReparentShard: "+err.Error())
}
}()
Expand Down
10 changes: 2 additions & 8 deletions go/vt/vtctl/reparentutil/emergency_reparenter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2735,8 +2735,6 @@ func TestEmergencyReparenter_waitForAllRelayLogsToApply(t *testing.T) {

func TestEmergencyReparenterStats(t *testing.T) {
ersCounter.ResetAll()
ersSuccessCounter.ResetAll()
ersFailureCounter.ResetAll()
legacyERSCounter.Reset()
legacyERSSuccessCounter.Reset()
legacyERSFailureCounter.Reset()
Expand Down Expand Up @@ -2869,9 +2867,7 @@ func TestEmergencyReparenterStats(t *testing.T) {
require.NoError(t, err)

// check the counter values
require.EqualValues(t, map[string]int64{"testkeyspace.-": 1}, ersCounter.Counts())
require.EqualValues(t, map[string]int64{"testkeyspace.-": 1}, ersSuccessCounter.Counts())
require.EqualValues(t, map[string]int64{}, ersFailureCounter.Counts())
require.EqualValues(t, map[string]int64{"testkeyspace.-.success": 1}, ersCounter.Counts())
require.EqualValues(t, map[string]int64{"All": 1, "EmergencyReparentShard": 1}, reparentShardOpTimings.Counts())

// check the legacy counter values
Expand All @@ -2890,9 +2886,7 @@ func TestEmergencyReparenterStats(t *testing.T) {
require.Error(t, err)

// check the counter values
require.EqualValues(t, map[string]int64{"testkeyspace.-": 2}, ersCounter.Counts())
require.EqualValues(t, map[string]int64{"testkeyspace.-": 1}, ersSuccessCounter.Counts())
require.EqualValues(t, map[string]int64{"testkeyspace.-": 1}, ersFailureCounter.Counts())
require.EqualValues(t, map[string]int64{"testkeyspace.-.success": 1, "testkeyspace.-.failure": 1}, ersCounter.Counts())
require.EqualValues(t, map[string]int64{"All": 2, "EmergencyReparentShard": 2}, reparentShardOpTimings.Counts())

// check the legacy counter values
Expand Down
21 changes: 8 additions & 13 deletions go/vt/vtctl/reparentutil/planned_reparenter.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,15 +41,11 @@ import (

// counters for Planned Reparent Shard
var (
prsCounter = stats.NewCountersWithMultiLabels("planned_reparent_shards", "Number of times Planned Reparent Shard has been run",
[]string{"Keyspace", "Shard"},
)
prsFailureCounter = stats.NewCountersWithMultiLabels("planned_reparent_shards_failed", "Number of times Planned Reparent Shard has failed",
[]string{"Keyspace", "Shard"},
)
prsSuccessCounter = stats.NewCountersWithMultiLabels("planned_reparent_shards_succeeded", "Number of times Planned Reparent Shard has succeeded",
[]string{"Keyspace", "Shard"},
prsCounter = stats.NewCountersWithMultiLabels("planned_reparent_counts", "Number of times Planned Reparent Shard has been run",
[]string{"Keyspace", "Shard", "Result"},
)
failureResult = "failure"
successResult = "success"
)

// PlannedReparenter performs PlannedReparentShard operations.
Expand Down Expand Up @@ -102,14 +98,13 @@ func NewPlannedReparenter(ts *topo.Server, tmc tmclient.TabletManagerClient, log
func (pr *PlannedReparenter) ReparentShard(ctx context.Context, keyspace string, shard string, opts PlannedReparentOptions) (*events.Reparent, error) {
var err error
statsLabels := []string{keyspace, shard}
prsCounter.Add(statsLabels, 1)

if err = topo.CheckShardLocked(ctx, keyspace, shard); err != nil {
var unlock func(*error)
opts.lockAction = pr.getLockAction(opts)
ctx, unlock, err = pr.ts.LockShard(ctx, keyspace, shard, opts.lockAction)
if err != nil {
prsFailureCounter.Add(statsLabels, 1)
prsCounter.Add(append(statsLabels, failureResult), 1)
return nil, err
}
defer unlock(&err)
Expand All @@ -118,7 +113,7 @@ func (pr *PlannedReparenter) ReparentShard(ctx context.Context, keyspace string,
if opts.NewPrimaryAlias == nil && opts.AvoidPrimaryAlias == nil {
shardInfo, err := pr.ts.GetShard(ctx, keyspace, shard)
if err != nil {
prsFailureCounter.Add(statsLabels, 1)
prsCounter.Add(append(statsLabels, failureResult), 1)
return nil, err
}

Expand All @@ -131,10 +126,10 @@ func (pr *PlannedReparenter) ReparentShard(ctx context.Context, keyspace string,
reparentShardOpTimings.Add("PlannedReparentShard", time.Since(startTime))
switch err {
case nil:
prsSuccessCounter.Add(statsLabels, 1)
prsCounter.Add(append(statsLabels, successResult), 1)
event.DispatchUpdate(ev, "finished PlannedReparentShard")
default:
prsFailureCounter.Add(statsLabels, 1)
prsCounter.Add(append(statsLabels, failureResult), 1)
event.DispatchUpdate(ev, "failed PlannedReparentShard: "+err.Error())
}
}()
Expand Down
10 changes: 2 additions & 8 deletions go/vt/vtctl/reparentutil/planned_reparenter_flaky_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3958,8 +3958,6 @@ func TestPlannedReparenter_verifyAllTabletsReachable(t *testing.T) {

func TestPlannedReparenterStats(t *testing.T) {
prsCounter.ResetAll()
prsSuccessCounter.ResetAll()
prsFailureCounter.ResetAll()
reparentShardOpTimings.Reset()

tmc := &testutil.TabletManagerClient{
Expand Down Expand Up @@ -4045,9 +4043,7 @@ func TestPlannedReparenterStats(t *testing.T) {
require.NoError(t, err)

// check the counter values
require.EqualValues(t, map[string]int64{"testkeyspace.-": 1}, prsCounter.Counts())
require.EqualValues(t, map[string]int64{"testkeyspace.-": 1}, prsSuccessCounter.Counts())
require.EqualValues(t, map[string]int64{}, prsFailureCounter.Counts())
require.EqualValues(t, map[string]int64{"testkeyspace.-.success": 1}, prsCounter.Counts())
require.EqualValues(t, map[string]int64{"All": 1, "PlannedReparentShard": 1}, reparentShardOpTimings.Counts())

// set plannedReparentOps to request a non existent tablet
Expand All @@ -4061,8 +4057,6 @@ func TestPlannedReparenterStats(t *testing.T) {
require.Error(t, err)

// check the counter values
require.EqualValues(t, map[string]int64{"testkeyspace.-": 2}, prsCounter.Counts())
require.EqualValues(t, map[string]int64{"testkeyspace.-": 1}, prsSuccessCounter.Counts())
require.EqualValues(t, map[string]int64{"testkeyspace.-": 1}, prsFailureCounter.Counts())
require.EqualValues(t, map[string]int64{"testkeyspace.-.success": 1, "testkeyspace.-.failure": 1}, prsCounter.Counts())
require.EqualValues(t, map[string]int64{"All": 2, "PlannedReparentShard": 2}, reparentShardOpTimings.Counts())
}

0 comments on commit df77a39

Please sign in to comment.