Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Heartbeat] Retry on no previous state #36842

Merged
merged 4 commits into from
Oct 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.next.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ is collected by it.
*Heartbeat*

- Fix panics when parsing dereferencing invalid parsed url. {pull}34702[34702]
- Fix retries to trigger on a down monitor with no previous state. {pull}36842[36842]

*Metricbeat*

Expand Down
6 changes: 3 additions & 3 deletions heartbeat/monitors/wrappers/summarizer/plugstatestat.go
Original file line number Diff line number Diff line change
Expand Up @@ -150,12 +150,12 @@ func (ssp *commonSSP) BeforeSummary(event *beat.Event) BeforeSummaryActions {
lastStatus := ssp.stateTracker.GetCurrentStatus(ssp.sf)

curCheckDown := ssp.js.Status == monitorstate.StatusDown
lastStateUp := ssp.stateTracker.GetCurrentStatus(ssp.sf) == monitorstate.StatusUp
lastStateUpOrEmpty := lastStatus == monitorstate.StatusUp || lastStatus == monitorstate.StatusEmpty
hasAttemptsRemaining := ssp.js.Attempt < ssp.js.MaxAttempts

// retry if...
retry := curCheckDown && // the current check is down
lastStateUp && // we were previously up, if we were previously down we just check once
lastStateUpOrEmpty && // we were previously up or had no previous state, if we were previously down we just check once
vigneshshanmugam marked this conversation as resolved.
Show resolved Hide resolved
hasAttemptsRemaining // and we are configured to actually make multiple attempts
// if we aren't retrying this is the final attempt
ssp.js.FinalAttempt = !retry
Expand All @@ -174,7 +174,7 @@ func (ssp *commonSSP) BeforeSummary(event *beat.Event) BeforeSummaryActions {

eventext.MergeEventFields(event, fields)

logp.L().Debugf("attempt info: current(%v) == lastStatus(%v) && attempts(%d < %d)", ssp.js.Status, lastStatus, ssp.js.Attempt, ssp.js.MaxAttempts)
logp.L().Infof("attempt info: current(%v) == lastStatus(%v) && attempts(%d < %d)", ssp.js.Status, lastStatus, ssp.js.Attempt, ssp.js.MaxAttempts)

if retry {
return RetryBeforeSummary
Expand Down
10 changes: 5 additions & 5 deletions heartbeat/monitors/wrappers/summarizer/summarizer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,10 +60,10 @@ func TestSummarizer(t *testing.T) {
{
"start down, transition to up",
2,
"du",
"du",
"11",
2,
"duu",
"duu",
"121",
3,
testURL,
},
{
Expand All @@ -80,7 +80,7 @@ func TestSummarizer(t *testing.T) {
2,
"dddddddd",
"dddddddd",
"11111111",
"12111111",
8,
testURL,
},
Expand Down
128 changes: 0 additions & 128 deletions heartbeat/monitors/wrappers/wrappers_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,6 @@ import (
"github.com/elastic/beats/v7/heartbeat/monitors/jobs"
"github.com/elastic/beats/v7/heartbeat/monitors/logger"
"github.com/elastic/beats/v7/heartbeat/monitors/stdfields"
"github.com/elastic/beats/v7/heartbeat/monitors/wrappers/monitorstate"
"github.com/elastic/beats/v7/heartbeat/monitors/wrappers/summarizer/jobsummary"
"github.com/elastic/beats/v7/heartbeat/monitors/wrappers/summarizer/summarizertesthelper"
"github.com/elastic/beats/v7/heartbeat/monitors/wrappers/wraputil"
"github.com/elastic/beats/v7/heartbeat/scheduler/schedule"
Expand Down Expand Up @@ -352,132 +350,6 @@ func TestMultiJobConts(t *testing.T) {
})
}

// TestRetryMultiCont is of somewhat dubious utility at the moment,
// it mostly tests that we __don't__ retry on an initial down.
// retry logic is better and more completely tested in the summarizer
// and scenario tests.
func TestRetryMultiCont(t *testing.T) {
uniqScope := isdef.ScopedIsUnique()

expected := []struct {
monStatus string
js jobsummary.JobSummary
state monitorstate.State
}{
{
"down",
jobsummary.JobSummary{
Status: "down",
FinalAttempt: true,
// we expect two up since this is a lightweight
// job and all events get a monitor status
// since no errors are returned that's 2
Up: 0,
Down: 2,
Attempt: 1,
MaxAttempts: 2,
},
monitorstate.State{
Status: "down",
Up: 0,
Down: 2,
Checks: 2,
},
},
{
"down",
jobsummary.JobSummary{
Status: "down",
FinalAttempt: true,
Up: 0,
Down: 2,
Attempt: 2,
MaxAttempts: 2,
},
monitorstate.State{
Status: "down",
Up: 0,
Down: 2,
Checks: 2,
},
},
}

jobErr := fmt.Errorf("down")

makeContJob := func(t *testing.T, u string) jobs.Job {
expIdx := 0
return func(event *beat.Event) ([]jobs.Job, error) {
eventext.MergeEventFields(event, mapstr.M{"cont": "1st"})
u, err := url.Parse(u)
require.NoError(t, err)
eventext.MergeEventFields(event, mapstr.M{"url": wraputil.URLFields(u)})

return []jobs.Job{
func(event *beat.Event) ([]jobs.Job, error) {
eventext.MergeEventFields(event, mapstr.M{"cont": "2nd"})
eventext.MergeEventFields(event, mapstr.M{"url": wraputil.URLFields(u)})

expIdx++
if expIdx >= len(expected)-1 {
expIdx = 0
}
exp := expected[expIdx]
if exp.js.Status == "down" {
return nil, jobErr
}

return nil, nil
},
}, jobErr
}
}

contJobValidator := func(u string, msg string) validator.Validator {
return lookslike.Compose(
urlValidator(t, u),
hbtestllext.MaybeHasEventType,
lookslike.MustCompile(map[string]interface{}{"cont": msg}),
lookslike.MustCompile(map[string]interface{}{
"error": map[string]interface{}{
"message": isdef.IsString,
"type": isdef.IsString,
},
"monitor": map[string]interface{}{
"id": uniqScope.IsUniqueTo(u),
"name": testMonFields.Name,
"type": testMonFields.Type,
"status": "down",
"check_group": uniqScope.IsUniqueTo(u),
},
"state": isdef.Optional(hbtestllext.IsMonitorState),
}),
hbtestllext.MonitorTimespanValidator,
)
}

retryMonFields := testMonFields
retryMonFields.MaxAttempts = 2

for _, expected := range expected {
testCommonWrap(t, testDef{
"multi-job-continuations-retry",
retryMonFields,
[]jobs.Job{makeContJob(t, "http://foo.com")},
[]validator.Validator{
contJobValidator("http://foo.com", "1st"),
lookslike.Compose(
contJobValidator("http://foo.com", "2nd"),
summarizertesthelper.SummaryValidator(expected.js.Up, expected.js.Down),
hbtestllext.MaybeHasDuration,
),
},
nil,
nil,
})
}
}

func TestMultiJobContsCancelledEvents(t *testing.T) {
uniqScope := isdef.ScopedIsUnique()

Expand Down
Loading