Skip to content

Commit

Permalink
[Heartbeat] Retry on no previous state (#36842)
Browse files Browse the repository at this point in the history
* Add empty state to retry cond

* Remove unwanted test

* Add changelog

* address review

---------

Co-authored-by: vigneshshanmugam <[email protected]>
  • Loading branch information
emilioalvap and vigneshshanmugam authored Oct 13, 2023
1 parent 8a70712 commit df4d550
Show file tree
Hide file tree
Showing 4 changed files with 9 additions and 136 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.next.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ is collected by it.
*Heartbeat*

- Fix panics when parsing dereferencing invalid parsed url. {pull}34702[34702]
- Fix retries to trigger on a down monitor with no previous state. {pull}36842[36842]

*Metricbeat*

Expand Down
6 changes: 3 additions & 3 deletions heartbeat/monitors/wrappers/summarizer/plugstatestat.go
Original file line number Diff line number Diff line change
Expand Up @@ -150,12 +150,12 @@ func (ssp *commonSSP) BeforeSummary(event *beat.Event) BeforeSummaryActions {
lastStatus := ssp.stateTracker.GetCurrentStatus(ssp.sf)

curCheckDown := ssp.js.Status == monitorstate.StatusDown
lastStateUp := ssp.stateTracker.GetCurrentStatus(ssp.sf) == monitorstate.StatusUp
lastStateUpOrEmpty := lastStatus == monitorstate.StatusUp || lastStatus == monitorstate.StatusEmpty
hasAttemptsRemaining := ssp.js.Attempt < ssp.js.MaxAttempts

// retry if...
retry := curCheckDown && // the current check is down
lastStateUp && // we were previously up, if we were previously down we just check once
lastStateUpOrEmpty && // we were previously up or had no previous state, if we were previously down we just check once
hasAttemptsRemaining // and we are configured to actually make multiple attempts
// if we aren't retrying this is the final attempt
ssp.js.FinalAttempt = !retry
Expand All @@ -174,7 +174,7 @@ func (ssp *commonSSP) BeforeSummary(event *beat.Event) BeforeSummaryActions {

eventext.MergeEventFields(event, fields)

logp.L().Debugf("attempt info: current(%v) == lastStatus(%v) && attempts(%d < %d)", ssp.js.Status, lastStatus, ssp.js.Attempt, ssp.js.MaxAttempts)
logp.L().Infof("attempt info: current(%v) == lastStatus(%v) && attempts(%d < %d)", ssp.js.Status, lastStatus, ssp.js.Attempt, ssp.js.MaxAttempts)

if retry {
return RetryBeforeSummary
Expand Down
10 changes: 5 additions & 5 deletions heartbeat/monitors/wrappers/summarizer/summarizer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,10 +60,10 @@ func TestSummarizer(t *testing.T) {
{
"start down, transition to up",
2,
"du",
"du",
"11",
2,
"duu",
"duu",
"121",
3,
testURL,
},
{
Expand All @@ -80,7 +80,7 @@ func TestSummarizer(t *testing.T) {
2,
"dddddddd",
"dddddddd",
"11111111",
"12111111",
8,
testURL,
},
Expand Down
128 changes: 0 additions & 128 deletions heartbeat/monitors/wrappers/wrappers_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,6 @@ import (
"github.com/elastic/beats/v7/heartbeat/monitors/jobs"
"github.com/elastic/beats/v7/heartbeat/monitors/logger"
"github.com/elastic/beats/v7/heartbeat/monitors/stdfields"
"github.com/elastic/beats/v7/heartbeat/monitors/wrappers/monitorstate"
"github.com/elastic/beats/v7/heartbeat/monitors/wrappers/summarizer/jobsummary"
"github.com/elastic/beats/v7/heartbeat/monitors/wrappers/summarizer/summarizertesthelper"
"github.com/elastic/beats/v7/heartbeat/monitors/wrappers/wraputil"
"github.com/elastic/beats/v7/heartbeat/scheduler/schedule"
Expand Down Expand Up @@ -352,132 +350,6 @@ func TestMultiJobConts(t *testing.T) {
})
}

// TestRetryMultiCont is of somewhat dubious utility at the moment,
// it mostly tests that we __don't__ retry on an initial down.
// retry logic is better and more completely tested in the summarizer
// and scenario tests.
func TestRetryMultiCont(t *testing.T) {
uniqScope := isdef.ScopedIsUnique()

expected := []struct {
monStatus string
js jobsummary.JobSummary
state monitorstate.State
}{
{
"down",
jobsummary.JobSummary{
Status: "down",
FinalAttempt: true,
// we expect two up since this is a lightweight
// job and all events get a monitor status
// since no errors are returned that's 2
Up: 0,
Down: 2,
Attempt: 1,
MaxAttempts: 2,
},
monitorstate.State{
Status: "down",
Up: 0,
Down: 2,
Checks: 2,
},
},
{
"down",
jobsummary.JobSummary{
Status: "down",
FinalAttempt: true,
Up: 0,
Down: 2,
Attempt: 2,
MaxAttempts: 2,
},
monitorstate.State{
Status: "down",
Up: 0,
Down: 2,
Checks: 2,
},
},
}

jobErr := fmt.Errorf("down")

makeContJob := func(t *testing.T, u string) jobs.Job {
expIdx := 0
return func(event *beat.Event) ([]jobs.Job, error) {
eventext.MergeEventFields(event, mapstr.M{"cont": "1st"})
u, err := url.Parse(u)
require.NoError(t, err)
eventext.MergeEventFields(event, mapstr.M{"url": wraputil.URLFields(u)})

return []jobs.Job{
func(event *beat.Event) ([]jobs.Job, error) {
eventext.MergeEventFields(event, mapstr.M{"cont": "2nd"})
eventext.MergeEventFields(event, mapstr.M{"url": wraputil.URLFields(u)})

expIdx++
if expIdx >= len(expected)-1 {
expIdx = 0
}
exp := expected[expIdx]
if exp.js.Status == "down" {
return nil, jobErr
}

return nil, nil
},
}, jobErr
}
}

contJobValidator := func(u string, msg string) validator.Validator {
return lookslike.Compose(
urlValidator(t, u),
hbtestllext.MaybeHasEventType,
lookslike.MustCompile(map[string]interface{}{"cont": msg}),
lookslike.MustCompile(map[string]interface{}{
"error": map[string]interface{}{
"message": isdef.IsString,
"type": isdef.IsString,
},
"monitor": map[string]interface{}{
"id": uniqScope.IsUniqueTo(u),
"name": testMonFields.Name,
"type": testMonFields.Type,
"status": "down",
"check_group": uniqScope.IsUniqueTo(u),
},
"state": isdef.Optional(hbtestllext.IsMonitorState),
}),
hbtestllext.MonitorTimespanValidator,
)
}

retryMonFields := testMonFields
retryMonFields.MaxAttempts = 2

for _, expected := range expected {
testCommonWrap(t, testDef{
"multi-job-continuations-retry",
retryMonFields,
[]jobs.Job{makeContJob(t, "http://foo.com")},
[]validator.Validator{
contJobValidator("http://foo.com", "1st"),
lookslike.Compose(
contJobValidator("http://foo.com", "2nd"),
summarizertesthelper.SummaryValidator(expected.js.Up, expected.js.Down),
hbtestllext.MaybeHasDuration,
),
},
nil,
nil,
})
}
}

func TestMultiJobContsCancelledEvents(t *testing.T) {
uniqScope := isdef.ScopedIsUnique()

Expand Down

0 comments on commit df4d550

Please sign in to comment.