Skip to content

Commit

Permalink
receiver/prometheus: remove assumption that scraped metrics share a r…
Browse files Browse the repository at this point in the history
…esource (open-telemetry#36479)

<!--Ex. Fixing a bug - Describe the bug and how this fixes the issue.
Ex. Adding a feature - Explain what this achieves.-->
#### Description
This change removes as assumption that all metrics in a single scrape
come from the same resource. This is indeed not true when `honor_labels`
is set to `true` AND when the scraped metrics contain a `job` or
`instance` label.

<!-- Issue number (e.g. open-telemetry#1234) or full URL to issue, if applicable. -->
#### Link to tracking issue
Fixes open-telemetry#36477

<!--Describe what testing was performed and which tests were added.-->
#### Testing
Added unit tests

<!--Describe the documentation added.-->
#### Documentation
N/A

<!--Please delete paragraphs that you did not use before submitting.-->
  • Loading branch information
ridwanmsharif authored and sbylica-splunk committed Dec 17, 2024
1 parent 7e0c50a commit e3a2ab5
Show file tree
Hide file tree
Showing 4 changed files with 124 additions and 20 deletions.
27 changes: 27 additions & 0 deletions .chloggen/adjuster-resource-fix.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Use this changelog template to create an entry for release notes.

# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
change_type: bug_fix

# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver)
component: receiver/prometheusreceiver

# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
note: Metric adjuster no longer assumes that all metrics from a scrape come from the same resource

# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
issues: [36477]

# (Optional) One or more lines of additional information to render under the primary note.
# These lines will be padded with 2 spaces and then inserted directly into the document.
# Use pipe (|) for multiline entries.
subtext:

# If your change doesn't affect end users or the exported elements of any package,
# you should instead start your pull request title with [chore] or use the "Skip Changelog" label.
# Optional: The change log or logs in which this entry should be included.
# e.g. '[user]' or '[user, api]'
# Include 'user' if the change is relevant to end users.
# Include 'api' if there is a change to a library API.
# Default: '[user]'
change_logs: []
32 changes: 18 additions & 14 deletions receiver/prometheusreceiver/internal/metrics_adjuster.go
Original file line number Diff line number Diff line change
Expand Up @@ -255,25 +255,28 @@ func NewInitialPointAdjuster(logger *zap.Logger, gcInterval time.Duration, useCr
// AdjustMetrics takes a sequence of metrics and adjust their start times based on the initial and
// previous points in the timeseriesMap.
func (a *initialPointAdjuster) AdjustMetrics(metrics pmetric.Metrics) error {
// By contract metrics will have at least 1 data point, so for sure will have at least one ResourceMetrics.

job, found := metrics.ResourceMetrics().At(0).Resource().Attributes().Get(semconv.AttributeServiceName)
if !found {
return errors.New("adjusting metrics without job")
}
for i := 0; i < metrics.ResourceMetrics().Len(); i++ {
rm := metrics.ResourceMetrics().At(i)
_, found := rm.Resource().Attributes().Get(semconv.AttributeServiceName)
if !found {
return errors.New("adjusting metrics without job")
}

instance, found := metrics.ResourceMetrics().At(0).Resource().Attributes().Get(semconv.AttributeServiceInstanceID)
if !found {
return errors.New("adjusting metrics without instance")
_, found = rm.Resource().Attributes().Get(semconv.AttributeServiceInstanceID)
if !found {
return errors.New("adjusting metrics without instance")
}
}
tsm := a.jobsMap.get(job.Str(), instance.Str())

// The lock on the relevant timeseriesMap is held throughout the adjustment process to ensure that
// nothing else can modify the data used for adjustment.
tsm.Lock()
defer tsm.Unlock()
for i := 0; i < metrics.ResourceMetrics().Len(); i++ {
rm := metrics.ResourceMetrics().At(i)
job, _ := rm.Resource().Attributes().Get(semconv.AttributeServiceName)
instance, _ := rm.Resource().Attributes().Get(semconv.AttributeServiceInstanceID)
tsm := a.jobsMap.get(job.Str(), instance.Str())

// The lock on the relevant timeseriesMap is held throughout the adjustment process to ensure that
// nothing else can modify the data used for adjustment.
tsm.Lock()
for j := 0; j < rm.ScopeMetrics().Len(); j++ {
ilm := rm.ScopeMetrics().At(j)
for k := 0; k < ilm.Metrics().Len(); k++ {
Expand Down Expand Up @@ -303,6 +306,7 @@ func (a *initialPointAdjuster) AdjustMetrics(metrics pmetric.Metrics) error {
}
}
}
tsm.Unlock()
}
return nil
}
Expand Down
62 changes: 56 additions & 6 deletions receiver/prometheusreceiver/internal/metrics_adjuster_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ var (
percent0 = []float64{10, 50, 90}

sum1 = "sum1"
sum2 = "sum2"
gauge1 = "gauge1"
histogram1 = "histogram1"
summary1 = "summary1"
Expand Down Expand Up @@ -103,6 +104,37 @@ func TestSum(t *testing.T) {
runScript(t, NewInitialPointAdjuster(zap.NewNop(), time.Minute, true), "job", "0", script)
}

func TestSumWithDifferentResources(t *testing.T) {
script := []*metricsAdjusterTest{
{
description: "Sum: round 1 - initial instance, start time is established",
metrics: metricsFromResourceMetrics(resourceMetrics("job1", "instance1", sumMetric(sum1, doublePoint(k1v1k2v2, t1, t1, 44))), resourceMetrics("job2", "instance2", sumMetric(sum2, doublePoint(k1v1k2v2, t1, t1, 44)))),
adjusted: metricsFromResourceMetrics(resourceMetrics("job1", "instance1", sumMetric(sum1, doublePoint(k1v1k2v2, t1, t1, 44))), resourceMetrics("job2", "instance2", sumMetric(sum2, doublePoint(k1v1k2v2, t1, t1, 44)))),
},
{
description: "Sum: round 2 - instance adjusted based on round 1 (metrics in different order)",
metrics: metricsFromResourceMetrics(resourceMetrics("job2", "instance2", sumMetric(sum2, doublePoint(k1v1k2v2, t2, t2, 66))), resourceMetrics("job1", "instance1", sumMetric(sum1, doublePoint(k1v1k2v2, t2, t2, 66)))),
adjusted: metricsFromResourceMetrics(resourceMetrics("job2", "instance2", sumMetric(sum2, doublePoint(k1v1k2v2, t1, t2, 66))), resourceMetrics("job1", "instance1", sumMetric(sum1, doublePoint(k1v1k2v2, t1, t2, 66)))),
},
{
description: "Sum: round 3 - instance reset (value less than previous value), start time is reset",
metrics: metricsFromResourceMetrics(resourceMetrics("job1", "instance1", sumMetric(sum1, doublePoint(k1v1k2v2, t3, t3, 55))), resourceMetrics("job2", "instance2", sumMetric(sum2, doublePoint(k1v1k2v2, t3, t3, 55)))),
adjusted: metricsFromResourceMetrics(resourceMetrics("job1", "instance1", sumMetric(sum1, doublePoint(k1v1k2v2, t3, t3, 55))), resourceMetrics("job2", "instance2", sumMetric(sum2, doublePoint(k1v1k2v2, t3, t3, 55)))),
},
{
description: "Sum: round 4 - instance adjusted based on round 3",
metrics: metricsFromResourceMetrics(resourceMetrics("job1", "instance1", sumMetric(sum1, doublePoint(k1v1k2v2, t4, t4, 72))), resourceMetrics("job2", "instance2", sumMetric(sum2, doublePoint(k1v1k2v2, t4, t4, 72)))),
adjusted: metricsFromResourceMetrics(resourceMetrics("job1", "instance1", sumMetric(sum1, doublePoint(k1v1k2v2, t3, t4, 72))), resourceMetrics("job2", "instance2", sumMetric(sum2, doublePoint(k1v1k2v2, t3, t4, 72)))),
},
{
description: "Sum: round 5 - instance adjusted based on round 4, sum2 metric resets but sum1 doesn't",
metrics: metricsFromResourceMetrics(resourceMetrics("job1", "instance1", sumMetric(sum1, doublePoint(k1v1k2v2, t5, t5, 72))), resourceMetrics("job2", "instance2", sumMetric(sum2, doublePoint(k1v1k2v2, t5, t5, 10)))),
adjusted: metricsFromResourceMetrics(resourceMetrics("job1", "instance1", sumMetric(sum1, doublePoint(k1v1k2v2, t3, t5, 72))), resourceMetrics("job2", "instance2", sumMetric(sum2, doublePoint(k1v1k2v2, t5, t5, 10)))),
},
}
runScript(t, NewInitialPointAdjuster(zap.NewNop(), time.Minute, true), "job", "0", script)
}

func TestSummaryNoCount(t *testing.T) {
script := []*metricsAdjusterTest{
{
Expand Down Expand Up @@ -710,14 +742,32 @@ func runScript(t *testing.T, ma MetricsAdjuster, job, instance string, tests []*
t.Run(test.description, func(t *testing.T) {
adjusted := pmetric.NewMetrics()
test.metrics.CopyTo(adjusted)
// Add the instance/job to the input metrics.
adjusted.ResourceMetrics().At(0).Resource().Attributes().PutStr(semconv.AttributeServiceInstanceID, instance)
adjusted.ResourceMetrics().At(0).Resource().Attributes().PutStr(semconv.AttributeServiceName, job)
// Add the instance/job to the input metrics if they aren't already present.
for i := 0; i < adjusted.ResourceMetrics().Len(); i++ {
rm := adjusted.ResourceMetrics().At(i)
_, found := rm.Resource().Attributes().Get(semconv.AttributeServiceName)
if !found {
rm.Resource().Attributes().PutStr(semconv.AttributeServiceName, job)
}
_, found = rm.Resource().Attributes().Get(semconv.AttributeServiceInstanceID)
if !found {
rm.Resource().Attributes().PutStr(semconv.AttributeServiceInstanceID, instance)
}
}
assert.NoError(t, ma.AdjustMetrics(adjusted))

// Add the instance/job to the expected metrics as well.
test.adjusted.ResourceMetrics().At(0).Resource().Attributes().PutStr(semconv.AttributeServiceInstanceID, instance)
test.adjusted.ResourceMetrics().At(0).Resource().Attributes().PutStr(semconv.AttributeServiceName, job)
// Add the instance/job to the expected metrics as well if they aren't already present.
for i := 0; i < test.adjusted.ResourceMetrics().Len(); i++ {
rm := test.adjusted.ResourceMetrics().At(i)
_, found := rm.Resource().Attributes().Get(semconv.AttributeServiceName)
if !found {
rm.Resource().Attributes().PutStr(semconv.AttributeServiceName, job)
}
_, found = rm.Resource().Attributes().Get(semconv.AttributeServiceInstanceID)
if !found {
rm.Resource().Attributes().PutStr(semconv.AttributeServiceInstanceID, instance)
}
}
assert.EqualValues(t, test.adjusted, adjusted)
})
}
Expand Down
23 changes: 23 additions & 0 deletions receiver/prometheusreceiver/internal/metricsutil_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ package internal
import (
"go.opentelemetry.io/collector/pdata/pcommon"
"go.opentelemetry.io/collector/pdata/pmetric"
semconv "go.opentelemetry.io/collector/semconv/v1.27.0"
)

type kv struct {
Expand All @@ -23,6 +24,28 @@ func metrics(metrics ...pmetric.Metric) pmetric.Metrics {
return md
}

func metricsFromResourceMetrics(metrics ...pmetric.ResourceMetrics) pmetric.Metrics {
md := pmetric.NewMetrics()
for _, metric := range metrics {
mr := md.ResourceMetrics().AppendEmpty()
metric.CopyTo(mr)
}
return md
}

func resourceMetrics(job, instance string, metrics ...pmetric.Metric) pmetric.ResourceMetrics {
mr := pmetric.NewResourceMetrics()
mr.Resource().Attributes().PutStr(semconv.AttributeServiceName, job)
mr.Resource().Attributes().PutStr(semconv.AttributeServiceInstanceID, instance)
ms := mr.ScopeMetrics().AppendEmpty().Metrics()

for _, metric := range metrics {
destMetric := ms.AppendEmpty()
metric.CopyTo(destMetric)
}
return mr
}

func histogramPointRaw(attributes []*kv, startTimestamp, timestamp pcommon.Timestamp) pmetric.HistogramDataPoint {
hdp := pmetric.NewHistogramDataPoint()
hdp.SetStartTimestamp(startTimestamp)
Expand Down

0 comments on commit e3a2ab5

Please sign in to comment.