From df4d550512a5fbe71e685c5b4baf7e25b0457702 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Emilio=20Alvarez=20Pi=C3=B1eiro?= <95703246+emilioalvap@users.noreply.github.com> Date: Fri, 13 Oct 2023 21:02:37 +0200 Subject: [PATCH] [Heartbeat] Retry on no previous state (#36842) * Add empty state to retry cond * Remove unwanted test * Add changelog * address review --------- Co-authored-by: vigneshshanmugam --- CHANGELOG.next.asciidoc | 1 + .../wrappers/summarizer/plugstatestat.go | 6 +- .../wrappers/summarizer/summarizer_test.go | 10 +- heartbeat/monitors/wrappers/wrappers_test.go | 128 ------------------ 4 files changed, 9 insertions(+), 136 deletions(-) diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc index 8ba973c8c63c..c4ec867eaf39 100644 --- a/CHANGELOG.next.asciidoc +++ b/CHANGELOG.next.asciidoc @@ -114,6 +114,7 @@ is collected by it. *Heartbeat* - Fix panics when parsing dereferencing invalid parsed url. {pull}34702[34702] +- Fix retries to trigger on a down monitor with no previous state. {pull}36842[36842] *Metricbeat* diff --git a/heartbeat/monitors/wrappers/summarizer/plugstatestat.go b/heartbeat/monitors/wrappers/summarizer/plugstatestat.go index 9567a666ad6d..4acfee4dc361 100644 --- a/heartbeat/monitors/wrappers/summarizer/plugstatestat.go +++ b/heartbeat/monitors/wrappers/summarizer/plugstatestat.go @@ -150,12 +150,12 @@ func (ssp *commonSSP) BeforeSummary(event *beat.Event) BeforeSummaryActions { lastStatus := ssp.stateTracker.GetCurrentStatus(ssp.sf) curCheckDown := ssp.js.Status == monitorstate.StatusDown - lastStateUp := ssp.stateTracker.GetCurrentStatus(ssp.sf) == monitorstate.StatusUp + lastStateUpOrEmpty := lastStatus == monitorstate.StatusUp || lastStatus == monitorstate.StatusEmpty hasAttemptsRemaining := ssp.js.Attempt < ssp.js.MaxAttempts // retry if... retry := curCheckDown && // the current check is down - lastStateUp && // we were previously up, if we were previously down we just check once + lastStateUpOrEmpty && // we were previously up or had no previous state, if we were previously down we just check once hasAttemptsRemaining // and we are configured to actually make multiple attempts // if we aren't retrying this is the final attempt ssp.js.FinalAttempt = !retry @@ -174,7 +174,7 @@ func (ssp *commonSSP) BeforeSummary(event *beat.Event) BeforeSummaryActions { eventext.MergeEventFields(event, fields) - logp.L().Debugf("attempt info: current(%v) == lastStatus(%v) && attempts(%d < %d)", ssp.js.Status, lastStatus, ssp.js.Attempt, ssp.js.MaxAttempts) + logp.L().Infof("attempt info: current(%v) == lastStatus(%v) && attempts(%d < %d)", ssp.js.Status, lastStatus, ssp.js.Attempt, ssp.js.MaxAttempts) if retry { return RetryBeforeSummary diff --git a/heartbeat/monitors/wrappers/summarizer/summarizer_test.go b/heartbeat/monitors/wrappers/summarizer/summarizer_test.go index 63de2da71a81..2a94b3e6f596 100644 --- a/heartbeat/monitors/wrappers/summarizer/summarizer_test.go +++ b/heartbeat/monitors/wrappers/summarizer/summarizer_test.go @@ -60,10 +60,10 @@ func TestSummarizer(t *testing.T) { { "start down, transition to up", 2, - "du", - "du", - "11", - 2, + "duu", + "duu", + "121", + 3, testURL, }, { @@ -80,7 +80,7 @@ func TestSummarizer(t *testing.T) { 2, "dddddddd", "dddddddd", - "11111111", + "12111111", 8, testURL, }, diff --git a/heartbeat/monitors/wrappers/wrappers_test.go b/heartbeat/monitors/wrappers/wrappers_test.go index 26a54f9fc36f..eb4565334342 100644 --- a/heartbeat/monitors/wrappers/wrappers_test.go +++ b/heartbeat/monitors/wrappers/wrappers_test.go @@ -43,8 +43,6 @@ import ( "github.com/elastic/beats/v7/heartbeat/monitors/jobs" "github.com/elastic/beats/v7/heartbeat/monitors/logger" "github.com/elastic/beats/v7/heartbeat/monitors/stdfields" - "github.com/elastic/beats/v7/heartbeat/monitors/wrappers/monitorstate" - "github.com/elastic/beats/v7/heartbeat/monitors/wrappers/summarizer/jobsummary" "github.com/elastic/beats/v7/heartbeat/monitors/wrappers/summarizer/summarizertesthelper" "github.com/elastic/beats/v7/heartbeat/monitors/wrappers/wraputil" "github.com/elastic/beats/v7/heartbeat/scheduler/schedule" @@ -352,132 +350,6 @@ func TestMultiJobConts(t *testing.T) { }) } -// TestRetryMultiCont is of somewhat dubious utility at the moment, -// it mostly tests that we __don't__ retry on an initial down. -// retry logic is better and more completely tested in the summarizer -// and scenario tests. -func TestRetryMultiCont(t *testing.T) { - uniqScope := isdef.ScopedIsUnique() - - expected := []struct { - monStatus string - js jobsummary.JobSummary - state monitorstate.State - }{ - { - "down", - jobsummary.JobSummary{ - Status: "down", - FinalAttempt: true, - // we expect two up since this is a lightweight - // job and all events get a monitor status - // since no errors are returned that's 2 - Up: 0, - Down: 2, - Attempt: 1, - MaxAttempts: 2, - }, - monitorstate.State{ - Status: "down", - Up: 0, - Down: 2, - Checks: 2, - }, - }, - { - "down", - jobsummary.JobSummary{ - Status: "down", - FinalAttempt: true, - Up: 0, - Down: 2, - Attempt: 2, - MaxAttempts: 2, - }, - monitorstate.State{ - Status: "down", - Up: 0, - Down: 2, - Checks: 2, - }, - }, - } - - jobErr := fmt.Errorf("down") - - makeContJob := func(t *testing.T, u string) jobs.Job { - expIdx := 0 - return func(event *beat.Event) ([]jobs.Job, error) { - eventext.MergeEventFields(event, mapstr.M{"cont": "1st"}) - u, err := url.Parse(u) - require.NoError(t, err) - eventext.MergeEventFields(event, mapstr.M{"url": wraputil.URLFields(u)}) - - return []jobs.Job{ - func(event *beat.Event) ([]jobs.Job, error) { - eventext.MergeEventFields(event, mapstr.M{"cont": "2nd"}) - eventext.MergeEventFields(event, mapstr.M{"url": wraputil.URLFields(u)}) - - expIdx++ - if expIdx >= len(expected)-1 { - expIdx = 0 - } - exp := expected[expIdx] - if exp.js.Status == "down" { - return nil, jobErr - } - - return nil, nil - }, - }, jobErr - } - } - - contJobValidator := func(u string, msg string) validator.Validator { - return lookslike.Compose( - urlValidator(t, u), - hbtestllext.MaybeHasEventType, - lookslike.MustCompile(map[string]interface{}{"cont": msg}), - lookslike.MustCompile(map[string]interface{}{ - "error": map[string]interface{}{ - "message": isdef.IsString, - "type": isdef.IsString, - }, - "monitor": map[string]interface{}{ - "id": uniqScope.IsUniqueTo(u), - "name": testMonFields.Name, - "type": testMonFields.Type, - "status": "down", - "check_group": uniqScope.IsUniqueTo(u), - }, - "state": isdef.Optional(hbtestllext.IsMonitorState), - }), - hbtestllext.MonitorTimespanValidator, - ) - } - - retryMonFields := testMonFields - retryMonFields.MaxAttempts = 2 - - for _, expected := range expected { - testCommonWrap(t, testDef{ - "multi-job-continuations-retry", - retryMonFields, - []jobs.Job{makeContJob(t, "http://foo.com")}, - []validator.Validator{ - contJobValidator("http://foo.com", "1st"), - lookslike.Compose( - contJobValidator("http://foo.com", "2nd"), - summarizertesthelper.SummaryValidator(expected.js.Up, expected.js.Down), - hbtestllext.MaybeHasDuration, - ), - }, - nil, - nil, - }) - } -} - func TestMultiJobContsCancelledEvents(t *testing.T) { uniqScope := isdef.ScopedIsUnique()