Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix releasing the global read lock when mysqlshell backup fails #17000

Merged
merged 3 commits into from
Oct 18, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions go/ioutil/writer.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ wrappers around WriteCloser and Writer.
package ioutil

import (
"bytes"
"io"
"time"
)
Expand Down Expand Up @@ -87,3 +88,16 @@ func NewMeteredWriter(tw io.Writer, fns ...func(int, time.Duration)) MeteredWrit
func (tw *meteredWriter) Write(p []byte) (int, error) {
return tw.meter.measure(tw.Writer.Write, p)
}

// MemoryBuffer implements io.WriteCloser using an in-memory buffer.
type MemoryBuffer struct {
mattlord marked this conversation as resolved.
Show resolved Hide resolved
*bytes.Buffer
}

func (m MemoryBuffer) Close() error {
return nil
}

func NewMemoryBuffer() MemoryBuffer {
return MemoryBuffer{bytes.NewBuffer(nil)}
}
17 changes: 15 additions & 2 deletions go/vt/mysqlctl/fakemysqldaemon.go
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,9 @@ type FakeMysqlDaemon struct {
// SemiSyncReplicaEnabled represents the state of rpl_semi_sync_replica_enabled.
SemiSyncReplicaEnabled bool

// GlobalReadLock is used to test if a lock has been acquired already or not
GlobalReadLock bool

// TimeoutHook is a func that can be called at the beginning of
// any method to fake a timeout.
// All a test needs to do is make it { return context.DeadlineExceeded }.
Expand Down Expand Up @@ -772,10 +775,20 @@ func (fmd *FakeMysqlDaemon) HostMetrics(ctx context.Context, cnf *Mycnf) (*mysql

// AcquireGlobalReadLock is part of the MysqlDaemon interface.
func (fmd *FakeMysqlDaemon) AcquireGlobalReadLock(ctx context.Context) error {
return errors.New("not implemented")
if fmd.GlobalReadLock {
return errors.New("lock already acquired")
}

fmd.GlobalReadLock = true
return nil
}

// ReleaseGlobalReadLock is part of the MysqlDaemon interface.
func (fmd *FakeMysqlDaemon) ReleaseGlobalReadLock(ctx context.Context) error {
return errors.New("not implemented")
if fmd.GlobalReadLock {
fmd.GlobalReadLock = false
return nil
}

return errors.New("no read locks acquired yet")
}
15 changes: 13 additions & 2 deletions go/vt/mysqlctl/mysqlshellbackupengine.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ var (
// disable redo logging and double write buffer
mysqlShellSpeedUpRestore = false

mysqlShellBackupBinaryName = "mysqlsh"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit, but any reason for this not to remain a const?

Looks like it's only changed for the test, the value used is passed in as a parameter to generate the new file:

generateTestFile(t, mysqlShellBackupBinaryName, "#!/bin/bash\nexit 1")

And we've told the engine to use this other file instead of mysqlsh. This is OK, but couldn't we instead set and unset a shell alias for mysqlsh so that the shell calls our test script? It feels a little less bad than allowing the binary name to be changed although it's the same effect.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Indeed, this is only changed for the test. I am not sure setting an alias would work, as the exec documentation says:

Unlike the "system" library call from C and other languages, the os/exec package intentionally does not invoke the system shell and does not expand any glob patterns or handle other expansions, pipelines, or redirections typically done by shells.

I would imagine the above also means that setting an alias wouldn't work


// use when checking if we need to create the directory on the local filesystem or not.
knownObjectStoreParams = []string{"s3BucketName", "osBucketName", "azureContainerName"}

Expand Down Expand Up @@ -107,8 +109,8 @@ type MySQLShellBackupEngine struct {
}

const (
mysqlShellBackupBinaryName = "mysqlsh"
mysqlShellBackupEngineName = "mysqlshell"
mysqlShellLockMessage = "Global read lock has been released"
)

func (be *MySQLShellBackupEngine) ExecuteBackup(ctx context.Context, params BackupParams, bh backupstorage.BackupHandle) (result BackupResult, finalErr error) {
Expand Down Expand Up @@ -152,6 +154,10 @@ func (be *MySQLShellBackupEngine) ExecuteBackup(ctx context.Context, params Back
}
lockAcquired := time.Now() // we will report how long we hold the lock for

// we need to release the global read lock in case the backup fails to start and
// the lock wasn't released by releaseReadLock() yet
defer func() { _ = params.Mysqld.ReleaseGlobalReadLock(ctx) }()
mattlord marked this conversation as resolved.
Show resolved Hide resolved

posBeforeBackup, err := params.Mysqld.PrimaryPosition(ctx)
if err != nil {
return BackupUnusable, vterrors.Wrap(err, "failed to fetch position")
Expand Down Expand Up @@ -184,6 +190,7 @@ func (be *MySQLShellBackupEngine) ExecuteBackup(ctx context.Context, params Back

// Get exit status.
if err := cmd.Wait(); err != nil {
pipeWriter.Close() // make sure we close the writer so the goroutines above will complete.
mattlord marked this conversation as resolved.
Show resolved Hide resolved
return BackupUnusable, vterrors.Wrap(err, mysqlShellBackupEngineName+" failed")
}

Expand Down Expand Up @@ -503,7 +510,7 @@ func releaseReadLock(ctx context.Context, reader io.Reader, params BackupParams,

if !released {

if !strings.Contains(line, "Global read lock has been released") {
if !strings.Contains(line, mysqlShellLockMessage) {
continue
}
released = true
Expand All @@ -521,6 +528,10 @@ func releaseReadLock(ctx context.Context, reader io.Reader, params BackupParams,
if err := scanner.Err(); err != nil {
params.Logger.Errorf("error reading from reader: %v", err)
}

if !released {
params.Logger.Errorf("could not release global lock earlier")
}
}

func cleanupMySQL(ctx context.Context, params RestoreParams, shouldDeleteUsers bool) error {
Expand Down
134 changes: 134 additions & 0 deletions go/vt/mysqlctl/mysqlshellbackupengine_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,17 @@ package mysqlctl

import (
"context"
"encoding/json"
"fmt"
"os"
"path"
"strings"
"testing"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"

"vitess.io/vitess/go/ioutil"
"vitess.io/vitess/go/mysql/fakesqldb"
"vitess.io/vitess/go/sqltypes"
"vitess.io/vitess/go/vt/logutil"
Expand Down Expand Up @@ -307,3 +311,133 @@ func TestCleanupMySQL(t *testing.T) {
}

}

// this is a helper to write files in a temporary directory
func generateTestFile(t *testing.T, name, contents string) {
f, err := os.OpenFile(name, os.O_CREATE|os.O_TRUNC|os.O_RDWR, 0700)
require.NoError(t, err)
defer f.Close()
_, err = f.WriteString(contents)
require.NoError(t, err)
require.NoError(t, f.Close())
}

// This tests if we are properly releasing the global read lock we acquire
// during ExecuteBackup(), even if the backup didn't succeed.
func TestMySQLShellBackupEngine_ExecuteBackup_ReleaseLock(t *testing.T) {
originalLocation := mysqlShellBackupLocation
originalBinary := mysqlShellBackupBinaryName
mysqlShellBackupLocation = "logical"
mysqlShellBackupBinaryName = path.Join(t.TempDir(), "test.sh")

defer func() { // restore the original values.
mysqlShellBackupLocation = originalLocation
mysqlShellBackupBinaryName = originalBinary
}()

fakedb := fakesqldb.New(t)
defer fakedb.Close()
mysql := NewFakeMysqlDaemon(fakedb)
defer mysql.Close()

be := &MySQLShellBackupEngine{}
params := BackupParams{
TabletAlias: "test",
Mysqld: mysql,
}
bs := FakeBackupStorage{
StartBackupReturn: FakeBackupStorageStartBackupReturn{},
}

t.Run("lock released if we see the mysqlsh lock being acquired", func(t *testing.T) {
logger := logutil.NewMemoryLogger()
params.Logger = logger
manifestBuffer := ioutil.NewMemoryBuffer()
bs.StartBackupReturn.BackupHandle = &FakeBackupHandle{
Dir: t.TempDir(),
AddFileReturn: FakeBackupHandleAddFileReturn{WriteCloser: manifestBuffer},
}

// this simulates mysql shell completing without any issues.
generateTestFile(t, mysqlShellBackupBinaryName, fmt.Sprintf("#!/bin/bash\n>&2 echo %s", mysqlShellLockMessage))

bh, err := bs.StartBackup(context.Background(), t.TempDir(), t.Name())
require.NoError(t, err)

_, err = be.ExecuteBackup(context.Background(), params, bh)
require.NoError(t, err)
require.False(t, mysql.GlobalReadLock) // lock must be released.

// check the manifest is valid.
var manifest MySQLShellBackupManifest
err = json.Unmarshal(manifestBuffer.Bytes(), &manifest)
require.NoError(t, err)

require.Equal(t, mysqlShellBackupEngineName, manifest.BackupMethod)

// did we notice the lock was release and did we release it ours as well?
errorLogged := false
for _, event := range logger.Events {
if strings.Contains(event.Value, "global read lock released after") {
errorLogged = true
}
}

assert.True(t, errorLogged, "failed to release the global lock after mysqlsh")
mattlord marked this conversation as resolved.
Show resolved Hide resolved
})

t.Run("lock released if when we don't see mysqlsh released it", func(t *testing.T) {
mysql.GlobalReadLock = false // clear lock status.
logger := logutil.NewMemoryLogger()
mattlord marked this conversation as resolved.
Show resolved Hide resolved
params.Logger = logger
manifestBuffer := ioutil.NewMemoryBuffer()
bs.StartBackupReturn.BackupHandle = &FakeBackupHandle{
Dir: t.TempDir(),
AddFileReturn: FakeBackupHandleAddFileReturn{WriteCloser: manifestBuffer},
}

// this simulates mysqlshell completing, but we don't see the message that is released its lock.
generateTestFile(t, mysqlShellBackupBinaryName, "#!/bin/bash\nexit 0")

bh, err := bs.StartBackup(context.Background(), t.TempDir(), t.Name())
require.NoError(t, err)

// in this case the backup was successful, but even if we didn't see mysqlsh release its lock
// we make sure it is released at the end.
_, err = be.ExecuteBackup(context.Background(), params, bh)
require.NoError(t, err)
require.False(t, mysql.GlobalReadLock) // lock must be released.

// make sure we are at least logging the lock wasn't able to be released earlier.
errorLogged := false
for _, event := range logger.Events {
if strings.Contains(event.Value, "could not release global lock earlier") {
errorLogged = true
}
}

assert.True(t, errorLogged, "failed to log error message when unable to release lock during backup")
})

t.Run("lock released when backup fails", func(t *testing.T) {
mysql.GlobalReadLock = false // clear lock status.
logger := logutil.NewMemoryLogger()
params.Logger = logger
manifestBuffer := ioutil.NewMemoryBuffer()
bs.StartBackupReturn.BackupHandle = &FakeBackupHandle{
Dir: t.TempDir(),
AddFileReturn: FakeBackupHandleAddFileReturn{WriteCloser: manifestBuffer},
}

// this simulates the backup process failing.
generateTestFile(t, mysqlShellBackupBinaryName, "#!/bin/bash\nexit 1")

bh, err := bs.StartBackup(context.Background(), t.TempDir(), t.Name())
require.NoError(t, err)

_, err = be.ExecuteBackup(context.Background(), params, bh)
require.ErrorContains(t, err, "mysqlshell failed")
require.False(t, mysql.GlobalReadLock) // lock must be released.
})

}
Loading