From f84c05b5774a83fbf3d8caa03758352ee2293f80 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Paolo=20Chil=C3=A0?= <paolo.chila@elastic.co>
Date: Tue, 19 Nov 2024 14:32:05 +0100
Subject: [PATCH] Metricbeat: add configurable failure threshold before
 reporting streams as degraded (#41570)

* Metricbeat: add configurable failure threshold before reporting streams as degraded

With this change it is possible to configure a threshold for the number of consecutive errors that may happen while fetching metrics for a given stream before the stream gets marked as DEGRADED.
To configure such threshold, add a "failure_threshold": <n> to a module configuration block.
Depending on the value of <n> the threshold will be configured in different ways:

    n == 0: status reporting for the stream has been disabled, the stream will never become DEGRADED no matter how many errors are encountered while fetching metrics
    n==1 or failure_threshold not specified: backward compatible behavior, the stream will become DEGRADED at the first error encountered
    n > 1: stream will become DEGRADED after at least n consecutive errors have been encountered

When a fetch operation completes without errors the consecutive errors counter is reset and the stream is set to HEALTHY.
---
 metricbeat/mb/module/wrapper.go               | 113 ++--
 metricbeat/mb/module/wrapper_internal_test.go | 567 ++++++++++++++++++
 2 files changed, 636 insertions(+), 44 deletions(-)
 create mode 100644 metricbeat/mb/module/wrapper_internal_test.go
diff --git a/metricbeat/mb/module/wrapper.go b/metricbeat/mb/module/wrapper.go
index 95185817f5fb..4681976f2e14 100644
--- a/metricbeat/mb/module/wrapper.go
+++ b/metricbeat/mb/module/wrapper.go
@@ -36,11 +36,15 @@ import (
 	"github.com/elastic/elastic-agent-libs/testing"
 )
 
-// Expvar metric names.
 const (
-	successesKey = "success"
-	failuresKey  = "failures"
-	eventsKey    = "events"
+	// Expvar metric names.
+	successesKey           = "success"
+	failuresKey            = "failures"
+	eventsKey              = "events"
+	consecutiveFailuresKey = "consecutive_failures"
+
+	// Failure threshold config key
+	failureThresholdKey = "failure_threshold"
 )
 
 var (
@@ -70,16 +74,18 @@ type metricSetWrapper struct {
 	module *Wrapper // Parent Module.
 	stats  *stats   // stats for this MetricSet.
 
-	periodic bool // Set to true if this metricset is a periodic fetcher
+	periodic         bool // Set to true if this metricset is a periodic fetcher
+	failureThreshold uint // threshold of consecutive errors needed to set the stream as degraded
 }
 
 // stats bundles common metricset stats.
 type stats struct {
-	key      string          // full stats key
-	ref      uint32          // number of modules/metricsets reusing stats instance
-	success  *monitoring.Int // Total success events.
-	failures *monitoring.Int // Total error events.
-	events   *monitoring.Int // Total events published.
+	key                 string           // full stats key
+	ref                 uint32           // number of modules/metricsets reusing stats instance
+	success             *monitoring.Int  // Total success events.
+	failures            *monitoring.Int  // Total error events.
+	events              *monitoring.Int  // Total events published.
+	consecutiveFailures *monitoring.Uint // Consecutive failures fetching this metricset
 }
 
 // NewWrapper creates a new module and its associated metricsets based on the given configuration.
@@ -106,11 +112,28 @@ func createWrapper(module mb.Module, metricSets []mb.MetricSet, options ...Optio
 		applyOption(wrapper)
 	}
 
+	failureThreshold := uint(1)
+
+	var streamHealthSettings struct {
+		FailureThreshold *uint `config:"failure_threshold"`
+	}
+
+	err := module.UnpackConfig(&streamHealthSettings)
+
+	if err != nil {
+		return nil, fmt.Errorf("unpacking raw config: %w", err)
+	}
+
+	if streamHealthSettings.FailureThreshold != nil {
+		failureThreshold = *streamHealthSettings.FailureThreshold
+	}
+
 	for i, metricSet := range metricSets {
 		wrapper.metricSets[i] = &metricSetWrapper{
-			MetricSet: metricSet,
-			module:    wrapper,
-			stats:     getMetricSetStats(wrapper.Name(), metricSet.Name()),
+			MetricSet:        metricSet,
+			module:           wrapper,
+			stats:            getMetricSetStats(wrapper.Name(), metricSet.Name()),
+			failureThreshold: failureThreshold,
 		}
 	}
 	return wrapper, nil
@@ -254,35 +277,11 @@ func (msw *metricSetWrapper) fetch(ctx context.Context, reporter reporter) {
 	case mb.ReportingMetricSetV2Error:
 		reporter.StartFetchTimer()
 		err := fetcher.Fetch(reporter.V2())
-		if err != nil {
-			reporter.V2().Error(err)
-			if errors.As(err, &mb.PartialMetricsError{}) {
-				// mark module as running if metrics are partially available and display the error message
-				msw.module.UpdateStatus(status.Running, fmt.Sprintf("Error fetching data for metricset %s.%s: %v", msw.module.Name(), msw.MetricSet.Name(), err))
-			} else {
-				// mark it as degraded for any other issue encountered
-				msw.module.UpdateStatus(status.Degraded, fmt.Sprintf("Error fetching data for metricset %s.%s: %v", msw.module.Name(), msw.MetricSet.Name(), err))
-			}
-			logp.Err("Error fetching data for metricset %s.%s: %s", msw.module.Name(), msw.Name(), err)
-		} else {
-			msw.module.UpdateStatus(status.Running, "")
-		}
+		msw.handleFetchError(err, reporter.V2())
 	case mb.ReportingMetricSetV2WithContext:
 		reporter.StartFetchTimer()
 		err := fetcher.Fetch(ctx, reporter.V2())
-		if err != nil {
-			reporter.V2().Error(err)
-			if errors.As(err, &mb.PartialMetricsError{}) {
-				// mark module as running if metrics are partially available and display the error message
-				msw.module.UpdateStatus(status.Running, fmt.Sprintf("Error fetching data for metricset %s.%s: %v", msw.module.Name(), msw.MetricSet.Name(), err))
-			} else {
-				// mark it as degraded for any other issue encountered
-				msw.module.UpdateStatus(status.Degraded, fmt.Sprintf("Error fetching data for metricset %s.%s: %v", msw.module.Name(), msw.MetricSet.Name(), err))
-			}
-			logp.Err("Error fetching data for metricset %s.%s: %s", msw.module.Name(), msw.Name(), err)
-		} else {
-			msw.module.UpdateStatus(status.Running, "")
-		}
+		msw.handleFetchError(err, reporter.V2())
 	default:
 		panic(fmt.Sprintf("unexpected fetcher type for %v", msw))
 	}
@@ -311,6 +310,31 @@ func (msw *metricSetWrapper) Test(d testing.Driver) {
 	})
 }
 
+func (msw *metricSetWrapper) handleFetchError(err error, reporter mb.PushReporterV2) {
+	switch {
+	case err == nil:
+		msw.stats.consecutiveFailures.Set(0)
+		msw.module.UpdateStatus(status.Running, "")
+
+	case errors.As(err, &mb.PartialMetricsError{}):
+		reporter.Error(err)
+		msw.stats.consecutiveFailures.Set(0)
+		// mark module as running if metrics are partially available and display the error message
+		msw.module.UpdateStatus(status.Running, fmt.Sprintf("Error fetching data for metricset %s.%s: %v", msw.module.Name(), msw.MetricSet.Name(), err))
+		logp.Err("Error fetching data for metricset %s.%s: %s", msw.module.Name(), msw.Name(), err)
+
+	default:
+		reporter.Error(err)
+		msw.stats.consecutiveFailures.Inc()
+		if msw.failureThreshold > 0 && msw.stats.consecutiveFailures != nil && uint(msw.stats.consecutiveFailures.Get()) >= msw.failureThreshold {
+			// mark it as degraded for any other issue encountered
+			msw.module.UpdateStatus(status.Degraded, fmt.Sprintf("Error fetching data for metricset %s.%s: %v", msw.module.Name(), msw.MetricSet.Name(), err))
+		}
+		logp.Err("Error fetching data for metricset %s.%s: %s", msw.module.Name(), msw.Name(), err)
+
+	}
+}
+
 type reporter interface {
 	StartFetchTimer()
 	V1() mb.PushReporter //nolint:staticcheck // PushReporter is deprecated but not removed
@@ -437,11 +461,12 @@ func getMetricSetStats(module, name string) *stats {
 
 	reg := monitoring.Default.NewRegistry(key)
 	s := &stats{
-		key:      key,
-		ref:      1,
-		success:  monitoring.NewInt(reg, successesKey),
-		failures: monitoring.NewInt(reg, failuresKey),
-		events:   monitoring.NewInt(reg, eventsKey),
+		key:                 key,
+		ref:                 1,
+		success:             monitoring.NewInt(reg, successesKey),
+		failures:            monitoring.NewInt(reg, failuresKey),
+		events:              monitoring.NewInt(reg, eventsKey),
+		consecutiveFailures: monitoring.NewUint(reg, consecutiveFailuresKey),
 	}
 
 	fetches[key] = s
diff --git a/metricbeat/mb/module/wrapper_internal_test.go b/metricbeat/mb/module/wrapper_internal_test.go
new file mode 100644
index 000000000000..a9b242e55e26
--- /dev/null
+++ b/metricbeat/mb/module/wrapper_internal_test.go
@@ -0,0 +1,567 @@
+// Licensed to Elasticsearch B.V. under one or more contributor
+// license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright
+// ownership. Elasticsearch B.V. licenses this file to you under
+// the Apache License, Version 2.0 (the "License"); you may
+// not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package module
+
+import (
+	"context"
+	"errors"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/mock"
+	"github.com/stretchr/testify/require"
+
+	"github.com/elastic/beats/v7/libbeat/management/status"
+	"github.com/elastic/beats/v7/metricbeat/mb"
+	conf "github.com/elastic/elastic-agent-libs/config"
+)
+
+const mockModuleName = "MockModule"
+const mockMetricSetName = "MockMetricSet"
+
+// mockReportingFetcher
+type mockReportingFetcher struct {
+	mb.BaseMetricSet
+	mock.Mock
+}
+
+func (mrf *mockReportingFetcher) Fetch(r mb.ReporterV2) error {
+	args := mrf.Called(r)
+	return args.Error(0)
+}
+
+// mockReportingFetcherWithContext
+type mockReportingFetcherWithContext struct {
+	mb.BaseMetricSet
+	mock.Mock
+}
+
+func (mrf *mockReportingFetcherWithContext) Fetch(ctx context.Context, r mb.ReporterV2) error {
+	args := mrf.Called(ctx, r)
+	return args.Error(0)
+}
+
+// mockReporter
+type mockReporter struct {
+	mock.Mock
+}
+
+func (mr *mockReporter) StartFetchTimer() {
+	mr.Called()
+}
+
+func (mr *mockReporter) V1() mb.PushReporter { //nolint:staticcheck // PushReporter is deprecated but not removed
+	args := mr.Called()
+	return args.Get(0).(mb.PushReporter) //nolint:staticcheck // PushReporter is deprecated but not removed
+}
+
+func (mr *mockReporter) V2() mb.PushReporterV2 {
+	args := mr.Called()
+	return args.Get(0).(mb.PushReporterV2)
+}
+
+// mockPushReporterV2
+type mockPushReporterV2 struct {
+	mock.Mock
+}
+
+func (mpr *mockPushReporterV2) Event(event mb.Event) bool {
+	args := mpr.Called(event)
+	return args.Bool(0)
+}
+
+func (mpr *mockPushReporterV2) Error(err error) bool {
+	args := mpr.Called(err)
+	return args.Bool(0)
+}
+
+func (mpr *mockPushReporterV2) Done() <-chan struct{} {
+	args := mpr.Called()
+	return args.Get(0).(<-chan struct{})
+}
+
+// mockStatusReporterV2
+type mockStatusReporter struct {
+	mock.Mock
+}
+
+func (m *mockStatusReporter) UpdateStatus(status status.Status, msg string) {
+	m.Called(status, msg)
+}
+
+func TestWrapperHandleFetchErrorSync(t *testing.T) {
+
+	fetchError := errors.New("fetch has gone all wrong")
+
+	t.Run("ReportingMetricSetV2Error", func(t *testing.T) {
+		type setupFunc func(t *testing.T, fetcher *mockReportingFetcher, pushReporter *mockPushReporterV2, statusReporter *mockStatusReporter)
+		type postIterationAssertFunc func(t *testing.T, i int, msWrapper *metricSetWrapper, fetcher *mockReportingFetcher, pushReporter *mockPushReporterV2, statusReporter *mockStatusReporter)
+
+		testcases := []struct {
+			name            string
+			config          *conf.C
+			setup           setupFunc
+			iterations      int
+			assertIteration postIterationAssertFunc
+		}{
+			{
+				name: "no failure_threshold: status DEGRADED after first error",
+				config: newConfig(t, map[string]interface{}{
+					"module":     mockModuleName,
+					"metricsets": []string{mockMetricSetName},
+					"period":     "100ms",
+					"hosts":      []string{"testhost"},
+				}),
+				setup: func(t *testing.T, fetcher *mockReportingFetcher, pushReporter *mockPushReporterV2, statusReporter *mockStatusReporter) {
+					// fetcher will immediately error out
+					fetcher.On("Fetch", pushReporter).Return(fetchError).Once()
+
+					// expect the error to be propagated via the pushReporter
+					pushReporter.On("Error", fetchError).Return(true).Once()
+					// expect the status degraded to be set
+					statusReporter.On("UpdateStatus", status.Degraded, mock.AnythingOfType("string")).Once()
+				},
+				iterations:      1,
+				assertIteration: nil,
+			},
+			{
+				name: "no failure_threshold: status DEGRADED after first error, reset to Running after first successful fetch",
+				config: newConfig(t, map[string]interface{}{
+					"module":     mockModuleName,
+					"metricsets": []string{mockMetricSetName},
+					"period":     "100ms",
+					"hosts":      []string{"testhost"},
+				}),
+				setup: func(t *testing.T, fetcher *mockReportingFetcher, pushReporter *mockPushReporterV2, statusReporter *mockStatusReporter) {
+					// fetcher will immediately error out 3 times
+					fetcher.On("Fetch", pushReporter).Return(fetchError).Times(3)
+					// fetcher will never error again afterwards
+					fetcher.On("Fetch", pushReporter).Return(nil)
+					// expect the error to be propagated via the pushReporter
+					pushReporter.On("Error", fetchError).Return(true).Times(3)
+					// expect the status degraded to be set 3 times
+					statusReporter.On("UpdateStatus", status.Degraded, mock.AnythingOfType("string")).Times(3)
+					// expect the status Running to be set once fetch recovers
+					statusReporter.On("UpdateStatus", status.Running, mock.AnythingOfType("string")).Twice()
+				},
+				iterations: 5,
+				assertIteration: func(t *testing.T, i int, msWrapper *metricSetWrapper, fetcher *mockReportingFetcher, pushReporter *mockPushReporterV2, statusReporter *mockStatusReporter) {
+					t.Logf("Assertion after iteration %d", i)
+					switch {
+					case i < 3:
+						assert.Truef(t, statusReporter.AssertCalled(t, "UpdateStatus", status.Degraded, mock.AnythingOfType("string")), "stream degraded at iteration %d", i)
+					case i >= 3:
+						assert.Truef(t, statusReporter.AssertCalled(t, "UpdateStatus", status.Running, mock.AnythingOfType("string")), "stream set to running at iteration %d", i)
+					}
+				},
+			},
+			{
+				name: "failure_threshold = 3: status DEGRADED at the 3rd error",
+				config: newConfig(t, map[string]interface{}{
+					"module":            mockModuleName,
+					"metricsets":        []string{mockMetricSetName},
+					"period":            "100ms",
+					"hosts":             []string{"testhost"},
+					failureThresholdKey: 3,
+				}),
+				setup: func(t *testing.T, fetcher *mockReportingFetcher, pushReporter *mockPushReporterV2, statusReporter *mockStatusReporter) {
+					// fetcher will immediately error out 3 times in a row
+					fetcher.On("Fetch", pushReporter).Return(fetchError).Times(3)
+					// expect the error to be propagated via the pushReporter at every iteration
+					pushReporter.On("Error", fetchError).Return(true).Times(3)
+					// expect the status degraded to be set
+					statusReporter.On("UpdateStatus", status.Degraded, mock.AnythingOfType("string")).Once()
+				},
+				iterations: 3,
+				assertIteration: func(t *testing.T, i int, msWrapper *metricSetWrapper, fetcher *mockReportingFetcher, pushReporter *mockPushReporterV2, statusReporter *mockStatusReporter) {
+					t.Logf("Assertion after iteration %d", i)
+					switch {
+					case i < 2:
+						assert.Truef(t, statusReporter.AssertNotCalled(t, "UpdateStatus", status.Degraded, mock.AnythingOfType("string")), "stream degraded at iteration %d", i)
+					case i == 2:
+						assert.Truef(t, statusReporter.AssertCalled(t, "UpdateStatus", status.Degraded, mock.AnythingOfType("string")), "stream not yet degraded at iteration %d", i)
+					}
+				},
+			},
+			{
+				name: "failure_threshold = 3: status HEALTHY after 2 errors, 1 success and 2 more errors, DEGRADED at the 3rd consecutive error",
+				config: newConfig(t, map[string]interface{}{
+					"module":            mockModuleName,
+					"metricsets":        []string{mockMetricSetName},
+					"period":            "100ms",
+					"hosts":             []string{"testhost"},
+					failureThresholdKey: 3,
+				}),
+				setup: func(t *testing.T, fetcher *mockReportingFetcher, pushReporter *mockPushReporterV2, statusReporter *mockStatusReporter) {
+					// fetcher will error out 2 times in a row
+					fetcher.On("Fetch", pushReporter).Return(fetchError).Times(2)
+					// fetcher will then succeed once
+					fetcher.On("Fetch", pushReporter).Return(nil).Once()
+					// fetcher will error out 3 more times in a row
+					fetcher.On("Fetch", pushReporter).Return(fetchError).Times(3)
+
+					// expect the error to be propagated via the pushReporter at every failing iteration
+					pushReporter.On("Error", fetchError).Return(true).Times(5)
+					// expect the status running to be set when there's no error returned by the fetcher at the 3rd iteration
+					statusReporter.On("UpdateStatus", status.Running, mock.AnythingOfType("string")).Once()
+					// expect the status degraded to be set only once
+					statusReporter.On("UpdateStatus", status.Degraded, mock.AnythingOfType("string")).Once()
+				},
+				iterations: 6,
+				assertIteration: func(t *testing.T, i int, msWrapper *metricSetWrapper, fetcher *mockReportingFetcher, pushReporter *mockPushReporterV2, statusReporter *mockStatusReporter) {
+					t.Logf("Assertion after iteration %d", i)
+					switch {
+					case i < 2:
+						assert.Truef(t, statusReporter.AssertNotCalled(t, "UpdateStatus", status.Degraded, mock.AnythingOfType("string")), "stream degraded at iteration %d", i)
+					case i >= 2 && i < 5:
+						assert.Truef(t, statusReporter.AssertNotCalled(t, "UpdateStatus", status.Degraded, mock.AnythingOfType("string")), "stream degraded at iteration %d", i)
+						assert.Truef(t, statusReporter.AssertCalled(t, "UpdateStatus", status.Running, mock.AnythingOfType("string")), "stream degraded at iteration %d", i)
+					case i == 5:
+						assert.Truef(t, statusReporter.AssertCalled(t, "UpdateStatus", status.Degraded, mock.AnythingOfType("string")), "stream not yet degraded at iteration %d", i)
+					}
+				},
+			},
+			{
+				name: "failure_threshold = 0: stream status update never become DEGRADED",
+				config: newConfig(t, map[string]interface{}{
+					"module":            mockModuleName,
+					"metricsets":        []string{mockMetricSetName},
+					"period":            "100ms",
+					"hosts":             []string{"testhost"},
+					failureThresholdKey: 0,
+				}),
+				setup: func(t *testing.T, fetcher *mockReportingFetcher, pushReporter *mockPushReporterV2, statusReporter *mockStatusReporter) {
+					// fetcher will error out 9 times in a row
+					fetcher.On("Fetch", pushReporter).Return(fetchError).Times(9)
+					// fetcher will then succeed once
+					fetcher.On("Fetch", pushReporter).Return(nil).Once()
+
+					// expect the error to be propagated via the pushReporter at every failing iteration
+					pushReporter.On("Error", fetchError).Return(true).Times(9)
+					// expect the status running to be set when there's no error returned by the fetcher at the 10th iteration
+					statusReporter.On("UpdateStatus", status.Running, mock.AnythingOfType("string")).Once()
+				},
+				iterations: 10,
+				assertIteration: func(t *testing.T, i int, msWrapper *metricSetWrapper, fetcher *mockReportingFetcher, pushReporter *mockPushReporterV2, statusReporter *mockStatusReporter) {
+					t.Logf("Assertion after iteration %d", i)
+					switch {
+					case i < 9:
+						assert.Truef(t, statusReporter.AssertNotCalled(t, "UpdateStatus", status.Degraded, mock.AnythingOfType("string")), "stream degraded at iteration %d", i)
+					case i == 9:
+						assert.Truef(t, statusReporter.AssertNotCalled(t, "UpdateStatus", status.Degraded, mock.AnythingOfType("string")), "stream degraded at iteration %d", i)
+						assert.Truef(t, statusReporter.AssertCalled(t, "UpdateStatus", status.Running, mock.AnythingOfType("string")), "stream degraded at iteration %d", i)
+					}
+				},
+			},
+		}
+
+		for _, tc := range testcases {
+			t.Run(tc.name, func(t *testing.T) {
+				// Setup mock push reporter
+				mpr := new(mockPushReporterV2)
+
+				// Setup mock fetcher
+				mrf := new(mockReportingFetcher)
+
+				// Setup mock StatusReporter
+				msr := new(mockStatusReporter)
+
+				//Setup mock reporter (ensure proper handling of intermediate calls, no functional value here)
+				mr := new(mockReporter)
+				mr.On("StartFetchTimer").Return()
+				mr.On("V2").Return(mpr)
+
+				// assert mocks expectations
+				t.Cleanup(func() {
+					mock.AssertExpectationsForObjects(t, mrf, mr, mpr, msr)
+				})
+
+				// setup mocks before starting the test
+				if tc.setup != nil {
+					tc.setup(t, mrf, mpr, msr)
+				}
+
+				// add metricset in registry
+				r := mb.NewRegister()
+				err := r.AddMetricSet(mockModuleName, mockMetricSetName, func(base mb.BaseMetricSet) (mb.MetricSet, error) {
+					mrf.BaseMetricSet = base
+					return mrf, nil
+				})
+				require.NoError(t, err)
+
+				aModule, metricSets, err := mb.NewModule(tc.config, r)
+				require.NoError(t, err)
+
+				// Set the mock status reporter
+				aModule.SetStatusReporter(msr)
+
+				moduleWrapper, err := NewWrapperForMetricSet(aModule, metricSets[0], WithMetricSetInfo())
+				require.NoError(t, err)
+
+				// run metricset synchronously
+				wrappedMetricSet := moduleWrapper.MetricSets()[0]
+
+				t.Cleanup(func() {
+					// release stats structure across testcases
+					releaseStats(wrappedMetricSet.stats)
+				})
+
+				for i := 0; i < tc.iterations; i++ {
+					wrappedMetricSet.fetch(context.TODO(), mr)
+					if tc.assertIteration != nil {
+						tc.assertIteration(t, i, wrappedMetricSet, mrf, mpr, msr)
+					}
+				}
+			})
+		}
+	})
+
+	t.Run("ReportingMetricSetV2WithContext", func(t *testing.T) {
+		// These tests are the same as ReportingMetricSetV2Error, duplicated here because the generic solution to specify
+		// testcases only once is awkward and not very readable
+
+		type setupFunc func(t *testing.T, fetcher *mockReportingFetcherWithContext, pushReporter *mockPushReporterV2, statusReporter *mockStatusReporter)
+		type postIterationAssertFunc func(t *testing.T, i int, msWrapper *metricSetWrapper, fetcher *mockReportingFetcherWithContext, pushReporter *mockPushReporterV2, statusReporter *mockStatusReporter)
+
+		fetchCtx := context.TODO()
+
+		testcases := []struct {
+			name            string
+			config          *conf.C
+			setup           setupFunc
+			iterations      int
+			assertIteration postIterationAssertFunc
+		}{
+			{
+				name: "no failure_threshold: status DEGRADED after first error",
+				config: newConfig(t, map[string]interface{}{
+					"module":     mockModuleName,
+					"metricsets": []string{mockMetricSetName},
+					"period":     "100ms",
+					"hosts":      []string{"testhost"},
+				}),
+				setup: func(t *testing.T, fetcher *mockReportingFetcherWithContext, pushReporter *mockPushReporterV2, statusReporter *mockStatusReporter) {
+					// fetcher will immediately error out
+					fetcher.On("Fetch", fetchCtx, pushReporter).Return(fetchError).Once()
+
+					// expect the error to be propagated via the pushReporter
+					pushReporter.On("Error", fetchError).Return(true).Once()
+					// expect the status degraded to be set
+					statusReporter.On("UpdateStatus", status.Degraded, mock.AnythingOfType("string")).Once()
+				},
+				iterations:      1,
+				assertIteration: nil,
+			},
+			{
+				name: "no failure_threshold: status DEGRADED after first error, reset to Running after first successful fetch",
+				config: newConfig(t, map[string]interface{}{
+					"module":     mockModuleName,
+					"metricsets": []string{mockMetricSetName},
+					"period":     "100ms",
+					"hosts":      []string{"testhost"},
+				}),
+				setup: func(t *testing.T, fetcher *mockReportingFetcherWithContext, pushReporter *mockPushReporterV2, statusReporter *mockStatusReporter) {
+					// fetcher will immediately error out 3 times
+					fetcher.On("Fetch", fetchCtx, pushReporter).Return(fetchError).Times(3)
+					// fetcher will never error again afterwards
+					fetcher.On("Fetch", fetchCtx, pushReporter).Return(nil)
+					// expect the error to be propagated via the pushReporter
+					pushReporter.On("Error", fetchError).Return(true).Times(3)
+					// expect the status degraded to be set 3 times
+					statusReporter.On("UpdateStatus", status.Degraded, mock.AnythingOfType("string")).Times(3)
+					// expect the status Running to be set once fetch recovers
+					statusReporter.On("UpdateStatus", status.Running, mock.AnythingOfType("string")).Twice()
+				},
+				iterations: 5,
+				assertIteration: func(t *testing.T, i int, msWrapper *metricSetWrapper, fetcher *mockReportingFetcherWithContext, pushReporter *mockPushReporterV2, statusReporter *mockStatusReporter) {
+					t.Logf("Assertion after iteration %d", i)
+					switch {
+					case i < 3:
+						assert.Truef(t, statusReporter.AssertCalled(t, "UpdateStatus", status.Degraded, mock.AnythingOfType("string")), "stream degraded at iteration %d", i)
+					case i >= 3:
+						assert.Truef(t, statusReporter.AssertCalled(t, "UpdateStatus", status.Running, mock.AnythingOfType("string")), "stream set to running at iteration %d", i)
+					}
+				},
+			},
+			{
+				name: "failure_threshold = 3: status DEGRADED at the 3rd error",
+				config: newConfig(t, map[string]interface{}{
+					"module":            mockModuleName,
+					"metricsets":        []string{mockMetricSetName},
+					"period":            "100ms",
+					"hosts":             []string{"testhost"},
+					failureThresholdKey: 3,
+				}),
+				setup: func(t *testing.T, fetcher *mockReportingFetcherWithContext, pushReporter *mockPushReporterV2, statusReporter *mockStatusReporter) {
+					// fetcher will immediately error out 3 times in a row
+					fetcher.On("Fetch", fetchCtx, pushReporter).Return(fetchError).Times(3)
+					// expect the error to be propagated via the pushReporter at every iteration
+					pushReporter.On("Error", fetchError).Return(true).Times(3)
+					// expect the status degraded to be set
+					statusReporter.On("UpdateStatus", status.Degraded, mock.AnythingOfType("string")).Once()
+				},
+				iterations: 3,
+				assertIteration: func(t *testing.T, i int, msWrapper *metricSetWrapper, fetcher *mockReportingFetcherWithContext, pushReporter *mockPushReporterV2, statusReporter *mockStatusReporter) {
+					t.Logf("Assertion after iteration %d", i)
+					switch {
+					case i < 2:
+						assert.Truef(t, statusReporter.AssertNotCalled(t, "UpdateStatus", status.Degraded, mock.AnythingOfType("string")), "stream degraded at iteration %d", i)
+					case i == 2:
+						assert.Truef(t, statusReporter.AssertCalled(t, "UpdateStatus", status.Degraded, mock.AnythingOfType("string")), "stream not yet degraded at iteration %d", i)
+					}
+				},
+			},
+			{
+				name: "failure_threshold = 3: status HEALTHY after 2 errors, 1 success and 2 more errors, DEGRADED at the 3rd consecutive error",
+				config: newConfig(t, map[string]interface{}{
+					"module":            mockModuleName,
+					"metricsets":        []string{mockMetricSetName},
+					"period":            "100ms",
+					"hosts":             []string{"testhost"},
+					failureThresholdKey: 3,
+				}),
+				setup: func(t *testing.T, fetcher *mockReportingFetcherWithContext, pushReporter *mockPushReporterV2, statusReporter *mockStatusReporter) {
+					// fetcher will error out 2 times in a row
+					fetcher.On("Fetch", fetchCtx, pushReporter).Return(fetchError).Times(2)
+					// fetcher will then succeed once
+					fetcher.On("Fetch", fetchCtx, pushReporter).Return(nil).Once()
+					// fetcher will error out 3 more times in a row
+					fetcher.On("Fetch", fetchCtx, pushReporter).Return(fetchError).Times(3)
+
+					// expect the error to be propagated via the pushReporter at every failing iteration
+					pushReporter.On("Error", fetchError).Return(true).Times(5)
+					// expect the status running to be set when there's no error returned by the fetcher at the 3rd iteration
+					statusReporter.On("UpdateStatus", status.Running, mock.AnythingOfType("string")).Once()
+					// expect the status degraded to be set only once
+					statusReporter.On("UpdateStatus", status.Degraded, mock.AnythingOfType("string")).Once()
+				},
+				iterations: 6,
+				assertIteration: func(t *testing.T, i int, msWrapper *metricSetWrapper, fetcher *mockReportingFetcherWithContext, pushReporter *mockPushReporterV2, statusReporter *mockStatusReporter) {
+					t.Logf("Assertion after iteration %d", i)
+					switch {
+					case i < 2:
+						assert.Truef(t, statusReporter.AssertNotCalled(t, "UpdateStatus", status.Degraded, mock.AnythingOfType("string")), "stream degraded at iteration %d", i)
+					case i >= 2 && i < 5:
+						assert.Truef(t, statusReporter.AssertNotCalled(t, "UpdateStatus", status.Degraded, mock.AnythingOfType("string")), "stream degraded at iteration %d", i)
+						assert.Truef(t, statusReporter.AssertCalled(t, "UpdateStatus", status.Running, mock.AnythingOfType("string")), "stream degraded at iteration %d", i)
+					case i == 5:
+						assert.Truef(t, statusReporter.AssertCalled(t, "UpdateStatus", status.Degraded, mock.AnythingOfType("string")), "stream not yet degraded at iteration %d", i)
+					}
+				},
+			},
+			{
+				name: "failure_threshold = 0: stream status update never become DEGRADED",
+				config: newConfig(t, map[string]interface{}{
+					"module":            mockModuleName,
+					"metricsets":        []string{mockMetricSetName},
+					"period":            "100ms",
+					"hosts":             []string{"testhost"},
+					failureThresholdKey: 0,
+				}),
+				setup: func(t *testing.T, fetcher *mockReportingFetcherWithContext, pushReporter *mockPushReporterV2, statusReporter *mockStatusReporter) {
+					// fetcher will error out 9 times in a row
+					fetcher.On("Fetch", fetchCtx, pushReporter).Return(fetchError).Times(9)
+					// fetcher will then succeed once
+					fetcher.On("Fetch", fetchCtx, pushReporter).Return(nil).Once()
+
+					// expect the error to be propagated via the pushReporter at every failing iteration
+					pushReporter.On("Error", fetchError).Return(true).Times(9)
+					// expect the status running to be set when there's no error returned by the fetcher at the 10th iteration
+					statusReporter.On("UpdateStatus", status.Running, mock.AnythingOfType("string")).Once()
+				},
+				iterations: 10,
+				assertIteration: func(t *testing.T, i int, msWrapper *metricSetWrapper, fetcher *mockReportingFetcherWithContext, pushReporter *mockPushReporterV2, statusReporter *mockStatusReporter) {
+					t.Logf("Assertion after iteration %d", i)
+					switch {
+					case i < 9:
+						assert.Truef(t, statusReporter.AssertNotCalled(t, "UpdateStatus", status.Degraded, mock.AnythingOfType("string")), "stream degraded at iteration %d", i)
+					case i == 9:
+						assert.Truef(t, statusReporter.AssertNotCalled(t, "UpdateStatus", status.Degraded, mock.AnythingOfType("string")), "stream degraded at iteration %d", i)
+						assert.Truef(t, statusReporter.AssertCalled(t, "UpdateStatus", status.Running, mock.AnythingOfType("string")), "stream degraded at iteration %d", i)
+					}
+				},
+			},
+		}
+
+		for _, tc := range testcases {
+			t.Run(tc.name, func(t *testing.T) {
+				// Setup mock push reporter
+				mpr := new(mockPushReporterV2)
+
+				// Setup mock fetcher
+				mrf := new(mockReportingFetcherWithContext)
+
+				// Setup mock StatusReporter
+				msr := new(mockStatusReporter)
+
+				//Setup mock reporter (ensure proper handling of intermediate calls, no functional value here)
+				mr := new(mockReporter)
+				mr.On("StartFetchTimer").Return()
+				mr.On("V2").Return(mpr)
+
+				// assert mocks expectations
+				t.Cleanup(func() {
+					mock.AssertExpectationsForObjects(t, mrf, mr, mpr, msr)
+				})
+
+				// setup mocks before starting the test
+				if tc.setup != nil {
+					tc.setup(t, mrf, mpr, msr)
+				}
+
+				// add metricset in registry
+				r := mb.NewRegister()
+				err := r.AddMetricSet(mockModuleName, mockMetricSetName, func(base mb.BaseMetricSet) (mb.MetricSet, error) {
+					mrf.BaseMetricSet = base
+					return mrf, nil
+				})
+				require.NoError(t, err)
+
+				aModule, metricSets, err := mb.NewModule(tc.config, r)
+				require.NoError(t, err)
+
+				// Set the mock status reporter
+				aModule.SetStatusReporter(msr)
+
+				moduleWrapper, err := NewWrapperForMetricSet(aModule, metricSets[0], WithMetricSetInfo())
+				require.NoError(t, err)
+
+				// run metricset synchronously
+				wrappedMetricSet := moduleWrapper.MetricSets()[0]
+
+				t.Cleanup(func() {
+					// release stats structure across testcases
+					releaseStats(wrappedMetricSet.stats)
+				})
+
+				for i := 0; i < tc.iterations; i++ {
+					wrappedMetricSet.fetch(context.TODO(), mr)
+					if tc.assertIteration != nil {
+						tc.assertIteration(t, i, wrappedMetricSet, mrf, mpr, msr)
+					}
+				}
+			})
+		}
+	})
+}
+
+func newConfig(t testing.TB, moduleConfig interface{}) *conf.C {
+	config, err := conf.NewConfigFrom(moduleConfig)
+	require.NoError(t, err)
+	return config
+}