Skip to content

Commit

Permalink
[receiver/windowsperfcounters] Returning partial errors for failures (o…
Browse files Browse the repository at this point in the history
…pen-telemetry#32370)

Returning partial errors for failures during scraping to prevent
throwing out all successfully retrieved metrics.

**Description:** Fixing a bug where a single instance of a windows
performance object prevents metrics for all other valid instances from
being published.

**Link to tracking Issue:**
[16712](open-telemetry#16712)

**Testing:** I added additional tests that verifies if some scrapers
fail to retrieve metrics, the ones that don't fail continue to publish
their metrics.

**Documentation:**
  • Loading branch information
chrislbs authored Apr 26, 2024
1 parent b7d2395 commit 21a87ff
Show file tree
Hide file tree
Showing 3 changed files with 119 additions and 12 deletions.
27 changes: 27 additions & 0 deletions .chloggen/feature_win-perf-receiver-partial-errors.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Use this changelog template to create an entry for release notes.

# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
change_type: 'enhancement'

# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver)
component: windowsperfcountersreceiver

# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
note: Returns partial errors for failures during scraping to prevent throwing out all successfully retrieved metrics

# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
issues: [16712]

# (Optional) One or more lines of additional information to render under the primary note.
# These lines will be padded with 2 spaces and then inserted directly into the document.
# Use pipe (|) for multiline entries.
subtext:

# If your change doesn't affect end users or the exported elements of any package,
# you should instead start your pull request title with [chore] or use the "Skip Changelog" label.
# Optional: The change log or logs in which this entry should be included.
# e.g. '[user]' or '[user, api]'
# Include 'user' if the change is relevant to end users.
# Include 'api' if there is a change to a library API.
# Default: '[user]'
change_logs: []
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"go.opentelemetry.io/collector/component"
"go.opentelemetry.io/collector/pdata/pcommon"
"go.opentelemetry.io/collector/pdata/pmetric"
"go.opentelemetry.io/collector/receiver/scrapererror"
"go.uber.org/multierr"
"go.uber.org/zap"

Expand Down Expand Up @@ -124,10 +125,12 @@ func (s *scraper) scrape(context.Context) (pmetric.Metrics, error) {
metrics[name] = builtMetric
}

scrapeFailures := 0
for _, watcher := range s.watchers {
counterVals, err := watcher.ScrapeData()
if err != nil {
errs = multierr.Append(errs, err)
scrapeFailures += 1
continue
}

Expand All @@ -145,6 +148,9 @@ func (s *scraper) scrape(context.Context) (pmetric.Metrics, error) {
initializeMetricDps(metric, now, val, watcher.MetricRep.Attributes)
}
}
if scrapeFailures != 0 && scrapeFailures != len(s.watchers) {
errs = scrapererror.NewPartialScrapeError(errs, scrapeFailures)
}
return md, errs
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ package windowsperfcountersreceiver

import (
"context"
"errors"
"fmt"
"path/filepath"
"testing"
"time"
Expand All @@ -16,7 +18,9 @@ import (
"go.opentelemetry.io/collector/component/componenttest"
"go.opentelemetry.io/collector/pdata/pcommon"
"go.opentelemetry.io/collector/pdata/pmetric"
"go.opentelemetry.io/collector/receiver/scrapererror"
"go.opentelemetry.io/collector/receiver/scraperhelper"
"go.uber.org/multierr"
"go.uber.org/zap"
"go.uber.org/zap/zapcore"
"go.uber.org/zap/zaptest/observer"
Expand Down Expand Up @@ -45,8 +49,15 @@ func (w *mockPerfCounter) Close() error {
return w.closeErr
}

func mockPerfCounterFactory(mpc mockPerfCounter) newWatcherFunc {
func mockPerfCounterFactoryInvocations(mpcs ...mockPerfCounter) newWatcherFunc {
invocationNum := 0
return func(string, string, string) (winperfcounters.PerfCounterWatcher, error) {
if invocationNum == len(mpcs) {
return nil, fmt.Errorf("invoked watcher %d times but only %d were setup", invocationNum+1, len(mpcs))
}
mpc := mpcs[invocationNum]
invocationNum += 1

return &mpc, nil
}
}
Expand Down Expand Up @@ -274,9 +285,9 @@ func TestInitWatchers(t *testing.T) {

func TestScrape(t *testing.T) {
testCases := []struct {
name string
cfg Config
mockCounterValues []winperfcounters.CounterValue
name string
cfg Config
mockPerfCounters []mockPerfCounter
}{
{
name: "metricsWithoutInstance",
Expand Down Expand Up @@ -305,7 +316,10 @@ func TestScrape(t *testing.T) {
"metric2": {Description: "metric2 description", Unit: "2"},
},
},
mockCounterValues: []winperfcounters.CounterValue{{Value: 1.0}},
mockPerfCounters: []mockPerfCounter{
{counterValues: []winperfcounters.CounterValue{{Value: 1.0}}},
{counterValues: []winperfcounters.CounterValue{{Value: 2.0}}},
},
},
{
name: "metricsWithInstance",
Expand Down Expand Up @@ -334,19 +348,76 @@ func TestScrape(t *testing.T) {
"metric2": {Description: "metric2 description", Unit: "2"},
},
},
mockCounterValues: []winperfcounters.CounterValue{{InstanceName: "Test Instance", Value: 1.0}},
mockPerfCounters: []mockPerfCounter{
{counterValues: []winperfcounters.CounterValue{{InstanceName: "Test Instance", Value: 1.0}}},
{counterValues: []winperfcounters.CounterValue{{InstanceName: "Test Instance", Value: 2.0}}},
},
},
{
name: "metricsWithSingleCounterFailure",
cfg: Config{
PerfCounters: []ObjectConfig{
{
Counters: []CounterConfig{
{
MetricRep: MetricRep{
Name: "metric1",
},
},
{
MetricRep: MetricRep{
Name: "metric2",
Attributes: map[string]string{
"test.attribute": "test-value",
},
},
},
{
MetricRep: MetricRep{
Name: "metric3",
},
},
},
},
},
MetricMetaData: map[string]MetricConfig{
"metric1": {Description: "metric1 description", Unit: "1"},
"metric2": {Description: "metric2 description", Unit: "2"},
"metric3": {Description: "metric3 description", Unit: "3"},
},
},
mockPerfCounters: []mockPerfCounter{
{counterValues: []winperfcounters.CounterValue{{InstanceName: "Test Instance", Value: 1.0}}},
{scrapeErr: errors.New("unable to scrape metric 2")},
{scrapeErr: errors.New("unable to scrape metric 3")},
},
},
}
for _, test := range testCases {
t.Run(test.name, func(t *testing.T) {
mpc := mockPerfCounter{counterValues: test.mockCounterValues}
mpcs := test.mockPerfCounters
testConfig := test.cfg
s := &scraper{cfg: &testConfig, newWatcher: mockPerfCounterFactory(mpc)}
s := &scraper{cfg: &testConfig, newWatcher: mockPerfCounterFactoryInvocations(mpcs...)}
errs := s.start(context.Background(), componenttest.NewNopHost())
require.NoError(t, errs)

var expectedErrors []error
for _, mpc := range test.mockPerfCounters {
if mpc.scrapeErr != nil {
expectedErrors = append(expectedErrors, mpc.scrapeErr)
}
}

m, err := s.scrape(context.Background())
require.NoError(t, err)
if len(expectedErrors) != 0 {
require.IsType(t, scrapererror.PartialScrapeError{}, err)
partialErr := err.(scrapererror.PartialScrapeError)
require.Equal(t, len(expectedErrors), partialErr.Failed)
expectedError := multierr.Combine(expectedErrors...)
require.Equal(t, expectedError.Error(), partialErr.Error())
} else {
require.NoError(t, err)
}
require.Equal(t, 1, m.ResourceMetrics().Len())
require.Equal(t, 1, m.ResourceMetrics().At(0).ScopeMetrics().Len())

Expand All @@ -356,15 +427,18 @@ func TestScrape(t *testing.T) {
})
curMetricsNum := 0
for _, pc := range test.cfg.PerfCounters {
for _, counterCfg := range pc.Counters {

for counterIdx, counterCfg := range pc.Counters {
metric := metrics.At(curMetricsNum)
assert.Equal(t, counterCfg.MetricRep.Name, metric.Name())
metricData := test.cfg.MetricMetaData[counterCfg.MetricRep.Name]
assert.Equal(t, metricData.Description, metric.Description())
assert.Equal(t, metricData.Unit, metric.Unit())
dps := metric.Gauge().DataPoints()
assert.Equal(t, len(test.mockCounterValues), dps.Len())
for dpIdx, val := range test.mockCounterValues {

counterValues := test.mockPerfCounters[counterIdx].counterValues
assert.Equal(t, len(counterValues), dps.Len())
for dpIdx, val := range counterValues {
assert.Equal(t, val.Value, dps.At(dpIdx).DoubleValue())
expectedAttributeLen := len(counterCfg.MetricRep.Attributes)
if val.InstanceName != "" {
Expand Down

0 comments on commit 21a87ff

Please sign in to comment.