Skip to content

Commit

Permalink
Merge branch 'slack-19.0' into patch-16304-v19
Browse files Browse the repository at this point in the history
  • Loading branch information
timvaillancourt authored Sep 18, 2024
2 parents 974564a + 46a46f5 commit 56e900b
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 4 deletions.
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ require (
github.com/kr/text v0.2.0
github.com/mitchellh/mapstructure v1.5.0
github.com/nsf/jsondiff v0.0.0-20210926074059-1e845ec5d249
github.com/slackhq/vitess-addons v0.19.0
github.com/slackhq/vitess-addons v0.19.1
github.com/slok/noglog v0.2.0
github.com/spf13/afero v1.11.0
github.com/spf13/jwalterweatherman v1.1.0
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -450,8 +450,8 @@ github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6Mwd
github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
github.com/sjmudd/stopwatch v0.1.1 h1:x45OvxFB5OtCkjvYtzRF5fWB857Jzjjk84Oyd5C5ebw=
github.com/sjmudd/stopwatch v0.1.1/go.mod h1:BLw0oIQJ1YLXBO/q9ufK/SgnKBVIkC2qrm6uy78Zw6U=
github.com/slackhq/vitess-addons v0.19.0 h1:+dWkQENsu8YYgsKesOKWqb3+vj66OY1WMvYOn9lmZ+I=
github.com/slackhq/vitess-addons v0.19.0/go.mod h1:E7i+cxyIY+I4An/JAvalQ9Ze2MjKlEx0u2nFXE4fgR0=
github.com/slackhq/vitess-addons v0.19.1 h1:k8f8pAJ2zqtetN+dnehAs7DFcZnI9IQRSL18ZMwNRCw=
github.com/slackhq/vitess-addons v0.19.1/go.mod h1:ZMzBBtadSA1MEuNIfZerztxLMhRFO+tmBZxv5HuV4lE=
github.com/slok/noglog v0.2.0 h1:1czu4l2EoJ8L92UwdSXXa1Y+c5TIjFAFm2P+mjej95E=
github.com/slok/noglog v0.2.0/go.mod h1:TfKxwpEZPT+UA83bQ6RME146k0MM4e8mwHLf6bhcGDI=
github.com/smartystreets/assertions v0.0.0-20190116191733-b6c0e53d7304/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc=
Expand Down
17 changes: 16 additions & 1 deletion go/vt/vtorc/logic/topology_recovery.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,11 @@ import (
"encoding/json"
"fmt"
"math/rand"
"os"
"time"

"github.com/patrickmn/go-cache"
"github.com/slackhq/vitess-addons/go/external"

"vitess.io/vitess/go/stats"
"vitess.io/vitess/go/vt/log"
Expand Down Expand Up @@ -81,6 +83,9 @@ var (

// recoveriesFailureCounter counts the number of failed recoveries that VTOrc has performed
recoveriesFailureCounter = stats.NewCountersWithSingleLabel("FailedRecoveries", "Count of the different failed recoveries performed", "RecoveryType", actionableRecoveriesNames...)

vtopsExec = external.NewExecVTOps(os.Getenv("VTOPS_PATH"), os.Getenv("VTOPS_HTTP_PROXY"), "vtorc", os.Getenv("HOSTNAME"))
vtopsSlackChannel = os.Getenv("SLACK_CHANNEL")
)

// recoveryFunction is the code of the recovery function to be used
Expand Down Expand Up @@ -297,6 +302,7 @@ func postErsCompletion(topologyRecovery *TopologyRecovery, analysisEntry *inst.R
_ = AuditTopologyRecovery(topologyRecovery, message)
_ = inst.AuditOperation(recoveryName, analysisEntry.AnalyzedInstanceAlias, message)
_ = AuditTopologyRecovery(topologyRecovery, fmt.Sprintf("%v: successfully promoted %+v", recoveryName, promotedReplica.InstanceAlias))
vtopsExec.RaiseProblem(analysisEntry.AnalyzedInstanceHostname, "orc-dead-tablet", true)
}
}

Expand Down Expand Up @@ -590,7 +596,6 @@ func runEmergentOperations(analysisEntry *inst.ReplicationAnalysis) {
func executeCheckAndRecoverFunction(analysisEntry *inst.ReplicationAnalysis) (err error) {
countPendingRecoveries.Add(1)
defer countPendingRecoveries.Add(-1)

checkAndRecoverFunctionCode := getCheckAndRecoverFunctionCode(analysisEntry.Analysis, analysisEntry.AnalyzedInstanceAlias)
isActionableRecovery := hasActionableRecovery(checkAndRecoverFunctionCode)
analysisEntry.IsActionableRecovery = isActionableRecovery
Expand All @@ -607,6 +612,7 @@ func executeCheckAndRecoverFunction(analysisEntry *inst.ReplicationAnalysis) (er

return nil
}

// we have a recovery function; its execution still depends on filters if not disabled.
if isActionableRecovery || util.ClearToLog("executeCheckAndRecoverFunction: detection", analysisEntry.AnalyzedInstanceAlias) {
log.Infof("executeCheckAndRecoverFunction: proceeding with %+v detection on %+v; isActionable?: %+v", analysisEntry.Analysis, analysisEntry.AnalyzedInstanceAlias, isActionableRecovery)
Expand Down Expand Up @@ -707,15 +713,23 @@ func executeCheckAndRecoverFunction(analysisEntry *inst.ReplicationAnalysis) (er
if isActionableRecovery || util.ClearToLog("executeCheckAndRecoverFunction: recovery", analysisEntry.AnalyzedInstanceAlias) {
log.Infof("executeCheckAndRecoverFunction: proceeding with %+v recovery on %+v; isRecoverable?: %+v", analysisEntry.Analysis, analysisEntry.AnalyzedInstanceAlias, isActionableRecovery)
}

recoveryAttempted, topologyRecovery, err := getCheckAndRecoverFunction(checkAndRecoverFunctionCode)(ctx, analysisEntry)
if !recoveryAttempted {
log.Infof("No recovery attempted on %s for problem %s.", analysisEntry.AnalyzedInstanceHostname, analysisEntry.Analysis)
return err
}
recoveryName := getRecoverFunctionName(checkAndRecoverFunctionCode)
recoveriesCounter.Add(recoveryName, 1)
if err != nil {
message := fmt.Sprintf("Recovery failed on %s for problem %s. Error: %s", analysisEntry.AnalyzedInstanceHostname, analysisEntry.Analysis, err.Error())
log.Info(message)
vtopsExec.SendSlackMessage(message, vtopsSlackChannel, true)
recoveriesFailureCounter.Add(recoveryName, 1)
} else {
message := fmt.Sprintf("Recovery succeeded on %s for problem %s.", analysisEntry.AnalyzedInstanceHostname, analysisEntry.Analysis)
log.Info(message)
vtopsExec.SendSlackMessage(message, vtopsSlackChannel, true)
recoveriesSuccessfulCounter.Add(recoveryName, 1)
}
if topologyRecovery == nil {
Expand Down Expand Up @@ -813,6 +827,7 @@ func postPrsCompletion(topologyRecovery *TopologyRecovery, analysisEntry *inst.R
_ = AuditTopologyRecovery(topologyRecovery, message)
_ = inst.AuditOperation(string(analysisEntry.Analysis), analysisEntry.AnalyzedInstanceAlias, message)
_ = AuditTopologyRecovery(topologyRecovery, fmt.Sprintf("%+v: successfully promoted %+v", analysisEntry.Analysis, promotedReplica.InstanceAlias))
vtopsExec.RaiseProblem(analysisEntry.AnalyzedInstanceHostname, "orc-dead-tablet", true)
}
}

Expand Down

0 comments on commit 56e900b

Please sign in to comment.