Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

VReplication: improve reliability of log management #15374

Merged
merged 19 commits into from
Mar 10, 2024
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions go/vt/binlog/binlogplayer/mock_dbclient.go
Original file line number Diff line number Diff line change
Expand Up @@ -244,3 +244,10 @@ func (dc *MockDBClient) ExecuteFetchMulti(query string, maxrows int) ([]*sqltype
}
return results, nil
}

// RemoveInvariant can be used to customize the behavior of the mock client.
func (dc *MockDBClient) RemoveInvariant(query string) {
dc.expectMu.Lock()
defer dc.expectMu.Unlock()
delete(dc.invariants, query)
}
3 changes: 2 additions & 1 deletion go/vt/vtctl/workflow/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -923,7 +923,8 @@

if stream.Id > streamLog.StreamId {
log.Warningf("Found stream log for nonexistent stream: %+v", streamLog)
break
// This can happen on manual/failed workflow cleanup so keep going.
continue

Check warning on line 927 in go/vt/vtctl/workflow/server.go

View check run for this annotation

Codecov / codecov/patch

go/vt/vtctl/workflow/server.go#L927

Added line #L927 was not covered by tests
}

// stream.Id == streamLog.StreamId
Expand Down
12 changes: 3 additions & 9 deletions go/vt/vttablet/tabletmanager/vreplication/engine.go
Original file line number Diff line number Diff line change
Expand Up @@ -433,9 +433,7 @@ func (vre *Engine) exec(query string, runAsAdmin bool) (*sqltypes.Result, error)
return nil, err
}
vre.controllers[id] = ct
if err := insertLogWithParams(vdbc, LogStreamCreate, id, params); err != nil {
return nil, err
}
insertLogWithParams(vdbc, LogStreamCreate, id, params)
}
return qr, nil
case updateQuery:
Expand Down Expand Up @@ -475,9 +473,7 @@ func (vre *Engine) exec(query string, runAsAdmin bool) (*sqltypes.Result, error)
return nil, err
}
vre.controllers[id] = ct
if err := insertLog(vdbc, LogStateChange, id, params["state"], ""); err != nil {
return nil, err
}
insertLog(vdbc, LogStateChange, id, params["state"], "")
}
return qr, nil
case deleteQuery:
Expand All @@ -495,9 +491,7 @@ func (vre *Engine) exec(query string, runAsAdmin bool) (*sqltypes.Result, error)
ct.Stop()
delete(vre.controllers, id)
}
if err := insertLogWithParams(vdbc, LogStreamDelete, id, nil); err != nil {
return nil, err
}
insertLogWithParams(vdbc, LogStreamDelete, id, nil)
}
if err := dbClient.Begin(); err != nil {
return nil, err
Expand Down
40 changes: 28 additions & 12 deletions go/vt/vttablet/tabletmanager/vreplication/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,17 @@

const (
vreplicationLogTableName = "vreplication_log"
// This comes from the fact that the message column in the vreplication_log table is of type TEXT.
mattlord marked this conversation as resolved.
Show resolved Hide resolved
// See: go/vt/sidecardb/schema/vreplication/vreplication_log.sql
// https://dev.mysql.com/doc/refman/en/string-type-syntax.html and
// https://dev.mysql.com/doc/refman/en/storage-requirements.html#data-types-storage-reqs-strings
maxVReplicationLogMessageLen = 65535
)

// vrepliationLogTruncationStr is the string that is used to indicate that a message has been
// truncated, in the middle, before being inserted into the vreplication_log table.
var vrepliationLogTruncationStr = fmt.Sprintf(" ... %s ... ", sqlparser.TruncationText)
mattlord marked this conversation as resolved.
Show resolved Hide resolved

const (
// Enum values for type column in the vreplication_log table.

Expand Down Expand Up @@ -82,46 +91,53 @@
return id, typ, state, message, nil
}

func insertLog(dbClient *vdbClient, typ string, vreplID int32, state, message string) error {
func insertLog(dbClient *vdbClient, typ string, vreplID int32, state, message string) {
// getLastLog returns the last log for a stream. During insertion, if the type/state/message match we do not insert
// a new log but increment the count. This prevents spamming of the log table in case the same message is logged continuously.
id, _, lastLogState, lastLogMessage, err := getLastLog(dbClient, vreplID)
if err != nil {
return err
log.Errorf("Could not insert vreplication_log record because we failed to get the last log record: %v", err)
return

Check warning on line 100 in go/vt/vttablet/tabletmanager/vreplication/utils.go

View check run for this annotation

Codecov / codecov/patch

go/vt/vttablet/tabletmanager/vreplication/utils.go#L99-L100

Added lines #L99 - L100 were not covered by tests
}
if typ == LogStateChange && state == lastLogState {
// handles case where current state is Running, controller restarts after an error and initializes the state Running
return nil
return
}
var query string
if id > 0 && message == lastLogMessage {
query = fmt.Sprintf("update %s.vreplication_log set count = count + 1 where id = %d", sidecar.GetIdentifier(), id)
} else {
buf := sqlparser.NewTrackedBuffer(nil)
// We perform the truncation, if needed, in the middle of the message as the end of the message is likely to
mattlord marked this conversation as resolved.
Show resolved Hide resolved
// be the most important part as it often explains WHY we e.g. failed to execute an INSERT in the workflow.
if len(message) > maxVReplicationLogMessageLen {
mid := (len(message) / 2) - len(vrepliationLogTruncationStr)
for mid > (maxVReplicationLogMessageLen / 2) {
mid = mid / 2
}
tail := (len(message) - (mid + len(vrepliationLogTruncationStr))) + 1
message = fmt.Sprintf("%s%s%s", message[:mid], vrepliationLogTruncationStr, message[tail:])
}
buf.Myprintf("insert into %s.vreplication_log(vrepl_id, type, state, message) values(%s, %s, %s, %s)",
sidecar.GetIdentifier(), strconv.Itoa(int(vreplID)), encodeString(typ), encodeString(state), encodeString(message))
query = buf.ParsedQuery().Query
}
if _, err = dbClient.ExecuteFetch(query, 10000); err != nil {
return fmt.Errorf("could not insert into log table: %v: %v", query, err)
log.Errorf("Could not insert into vreplication_log table: %v: %v", query, err)

Check warning on line 126 in go/vt/vttablet/tabletmanager/vreplication/utils.go

View check run for this annotation

Codecov / codecov/patch

go/vt/vttablet/tabletmanager/vreplication/utils.go#L126

Added line #L126 was not covered by tests
}
return nil
}

// insertLogWithParams is called when a stream is created. The attributes of the stream are stored as a json string
func insertLogWithParams(dbClient *vdbClient, action string, vreplID int32, params map[string]string) error {
// insertLogWithParams is called when a stream is created. The attributes of the stream are stored as a json string.
func insertLogWithParams(dbClient *vdbClient, action string, vreplID int32, params map[string]string) {
var message string
if params != nil {
obj, _ := json.Marshal(params)
message = string(obj)
}
if err := insertLog(dbClient, action, vreplID, params["state"], message); err != nil {
return err
}
return nil
insertLog(dbClient, action, vreplID, params["state"], message)
}

// isUnrecoverableError returns true if vreplication cannot recover from the given error and should completely terminate
// isUnrecoverableError returns true if vreplication cannot recover from the given error and should completely terminate.
func isUnrecoverableError(err error) bool {
if err == nil {
return false
Expand Down
99 changes: 99 additions & 0 deletions go/vt/vttablet/tabletmanager/vreplication/utils_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
/*
Copyright 2024 The Vitess Authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package vreplication

import (
"fmt"
"strings"
"testing"

"github.com/stretchr/testify/require"

"vitess.io/vitess/go/sqltypes"
"vitess.io/vitess/go/vt/binlog/binlogplayer"

binlogdatapb "vitess.io/vitess/go/vt/proto/binlogdata"
)

func TestInsertLogTruncation(t *testing.T) {
dbClient := binlogplayer.NewMockDBClient(t)
defer dbClient.Close()
dbClient.RemoveInvariant("insert into _vt.vreplication_log") // Otherwise the insert will be ignored
stats := binlogplayer.NewStats()
defer stats.Stop()
vdbClient := newVDBClient(dbClient, stats)
defer vdbClient.Close()
vrID := int32(1)
typ := "Testing"
state := binlogdatapb.VReplicationWorkflowState_Error.String()

insertStmtf := "insert into _vt.vreplication_log(vrepl_id, type, state, message) values(%d, '%s', '%s', %s)"

tests := []struct {
message string
expectTruncation bool
}{
{
message: "Simple message that's not truncated",
},
{
message: "Simple message that needs to be truncated " + strings.Repeat("a", 80000) + " cuz it's long",
expectTruncation: true,
},
{
message: "Simple message that doesn't need to be truncated " + strings.Repeat("b", 64000) + " cuz it's not quite too long",
},
{
message: "Message that is just barely short enough " + strings.Repeat("c", maxVReplicationLogMessageLen-(len("Message that is just barely short enough ")+len(" so it doesn't get truncated"))) + " so it doesn't get truncated",
},
{
message: "Message that is just barely too long " + strings.Repeat("d", maxVReplicationLogMessageLen-(len("Message that is just barely too long ")+len(" so it gets truncated"))+1) + " so it gets truncated",
expectTruncation: true,
},
{
message: "Super long message brosef wut r ya doin " + strings.Repeat("e", 60000) + strings.Repeat("f", 60000) + " so maybe don't do that to yourself and your friends",
expectTruncation: true,
},
{
message: "Super duper long message brosef wut r ya doin " + strings.Repeat("g", 120602) + strings.Repeat("h", 120001) + " so maybe really don't do that to yourself and your friends",
expectTruncation: true,
},
}
for _, tc := range tests {
t.Run("insertLog", func(t *testing.T) {
var messageOut string
if tc.expectTruncation {
mid := (len(tc.message) / 2) - len(vrepliationLogTruncationStr)
for mid > (maxVReplicationLogMessageLen / 2) {
mid = mid / 2
}
tail := (len(tc.message) - (mid + len(vrepliationLogTruncationStr))) + 1
messageOut = fmt.Sprintf("%s%s%s", tc.message[:mid], vrepliationLogTruncationStr, tc.message[tail:])
require.True(t, strings.HasPrefix(messageOut, tc.message[:1024])) // Confirm we still have the same beginning
require.True(t, strings.HasSuffix(messageOut, tc.message[len(tc.message)-1024:])) // Confirm we still have the same end
require.True(t, strings.Contains(messageOut, vrepliationLogTruncationStr)) // Confirm we have the truncation text
t.Logf("Original message length: %d, truncated message length: %d", len(tc.message), len(messageOut))
} else {
messageOut = tc.message
}
require.LessOrEqual(t, len(messageOut), maxVReplicationLogMessageLen)
dbClient.ExpectRequest(fmt.Sprintf(insertStmtf, vrID, typ, state, encodeString(messageOut)), &sqltypes.Result{}, nil)
insertLog(vdbClient, typ, vrID, state, tc.message)
dbClient.Wait()
})
}
}
16 changes: 4 additions & 12 deletions go/vt/vttablet/tabletmanager/vreplication/vcopier.go
Original file line number Diff line number Diff line change
Expand Up @@ -246,31 +246,23 @@
if err := vc.vr.setState(binlogdatapb.VReplicationWorkflowState_Copying, ""); err != nil {
return err
}
if err := vc.vr.insertLog(LogCopyStart, fmt.Sprintf("Copy phase started for %d table(s)",
len(plan.TargetTables))); err != nil {
return err
}
vc.vr.insertLog(LogCopyStart, fmt.Sprintf("Copy phase started for %d table(s)", len(plan.TargetTables)))

if vc.vr.supportsDeferredSecondaryKeys() {
settings, err := binlogplayer.ReadVRSettings(vc.vr.dbClient, vc.vr.id)
if err != nil {
return err
}
if settings.DeferSecondaryKeys {
if err := vc.vr.insertLog(LogCopyStart, fmt.Sprintf("Copy phase temporarily dropping secondary keys for %d table(s)",
len(plan.TargetTables))); err != nil {
return err
}
vc.vr.insertLog(LogCopyStart, fmt.Sprintf("Copy phase temporarily dropping secondary keys for %d table(s)", len(plan.TargetTables)))

Check warning on line 257 in go/vt/vttablet/tabletmanager/vreplication/vcopier.go

View check run for this annotation

Codecov / codecov/patch

go/vt/vttablet/tabletmanager/vreplication/vcopier.go#L257

Added line #L257 was not covered by tests
for _, tableName := range tableNames {
if err := vc.vr.stashSecondaryKeys(ctx, tableName); err != nil {
return err
}
}
if err := vc.vr.insertLog(LogCopyStart,
vc.vr.insertLog(LogCopyStart,

Check warning on line 263 in go/vt/vttablet/tabletmanager/vreplication/vcopier.go

View check run for this annotation

Codecov / codecov/patch

go/vt/vttablet/tabletmanager/vreplication/vcopier.go#L263

Added line #L263 was not covered by tests
fmt.Sprintf("Copy phase finished dropping secondary keys and saving post copy actions to restore them for %d table(s)",
len(plan.TargetTables))); err != nil {
return err
}
len(plan.TargetTables)))

Check warning on line 265 in go/vt/vttablet/tabletmanager/vreplication/vcopier.go

View check run for this annotation

Codecov / codecov/patch

go/vt/vttablet/tabletmanager/vreplication/vcopier.go#L265

Added line #L265 was not covered by tests
}
}
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2799,8 +2799,7 @@ func TestVReplicationLogs(t *testing.T) {

for _, want := range expected {
t.Run("", func(t *testing.T) {
err = insertLog(vdbc, LogMessage, 1, binlogdatapb.VReplicationWorkflowState_Running.String(), "message1")
require.NoError(t, err)
insertLog(vdbc, LogMessage, 1, binlogdatapb.VReplicationWorkflowState_Running.String(), "message1")
qr, err := env.Mysqld.FetchSuperQuery(context.Background(), query)
require.NoError(t, err)
require.Equal(t, want, fmt.Sprintf("%v", qr.Rows))
Expand Down
26 changes: 7 additions & 19 deletions go/vt/vttablet/tabletmanager/vreplication/vreplicator.go
Original file line number Diff line number Diff line change
Expand Up @@ -288,9 +288,7 @@ func (vr *vreplicator) replicate(ctx context.Context) error {
return err
}
if numTablesToCopy == 0 {
if err := vr.insertLog(LogCopyEnd, fmt.Sprintf("Copy phase completed at gtid %s", settings.StartPos)); err != nil {
return err
}
vr.insertLog(LogCopyEnd, fmt.Sprintf("Copy phase completed at gtid %s", settings.StartPos))
}
}
case settings.StartPos.IsZero():
Expand Down Expand Up @@ -464,14 +462,12 @@ func (vr *vreplicator) setMessage(message string) error {
if _, err := vr.dbClient.Execute(query); err != nil {
return fmt.Errorf("could not set message: %v: %v", query, err)
}
if err := insertLog(vr.dbClient, LogMessage, vr.id, vr.state.String(), message); err != nil {
return err
}
insertLog(vr.dbClient, LogMessage, vr.id, vr.state.String(), message)
return nil
}

func (vr *vreplicator) insertLog(typ, message string) error {
return insertLog(vr.dbClient, typ, vr.id, vr.state.String(), message)
func (vr *vreplicator) insertLog(typ, message string) {
insertLog(vr.dbClient, typ, vr.id, vr.state.String(), message)
}

func (vr *vreplicator) setState(state binlogdatapb.VReplicationWorkflowState, message string) error {
Expand All @@ -489,9 +485,7 @@ func (vr *vreplicator) setState(state binlogdatapb.VReplicationWorkflowState, me
if state == vr.state {
return nil
}
if err := insertLog(vr.dbClient, LogStateChange, vr.id, state.String(), message); err != nil {
return err
}
insertLog(vr.dbClient, LogStateChange, vr.id, state.String(), message)
vr.state = state

return nil
Expand Down Expand Up @@ -815,10 +809,7 @@ func (vr *vreplicator) execPostCopyActions(ctx context.Context, tableName string
return nil
}

if err := vr.insertLog(LogCopyStart, fmt.Sprintf("Executing %d post copy action(s) for %s table",
len(qr.Rows), tableName)); err != nil {
return err
}
vr.insertLog(LogCopyStart, fmt.Sprintf("Executing %d post copy action(s) for %s table", len(qr.Rows), tableName))

// Save our connection ID so we can use it to easily KILL any
// running SQL action we may perform later if needed.
Expand Down Expand Up @@ -1039,10 +1030,7 @@ func (vr *vreplicator) execPostCopyActions(ctx context.Context, tableName string
}
}

if err := vr.insertLog(LogCopyStart, fmt.Sprintf("Completed all post copy actions for %s table",
tableName)); err != nil {
return err
}
vr.insertLog(LogCopyStart, fmt.Sprintf("Completed all post copy actions for %s table", tableName))

return nil
}
Expand Down
Loading