Skip to content

Commit

Permalink
Collect metric values after they're all available
Browse files Browse the repository at this point in the history
The metricset now collects metrics values using the largest ingest
delay interval instead of the individual shortest ingest.

By using the largest ingest delay, the metricset gets all the metrics
values for each data point in a single collection.

Drop the `gcp.metric_names_fingerprint` because it's no longer needed.
  • Loading branch information
zmoog committed Nov 2, 2023
1 parent d36f7a0 commit f655e50
Show file tree
Hide file tree
Showing 6 changed files with 48 additions and 76 deletions.
10 changes: 0 additions & 10 deletions metricbeat/docs/fields.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -35303,16 +35303,6 @@ GCP module



*`gcp.metric_names_fingerprint`*::
+
--
The SHA-256 hash of the comma-separated list metric names collected in the batch. For example, l3.external.ingress_packets.count,l3.external.ingress.bytes > 24848afd85b65b9e168fa5dee12bd3e121c58a504cc0b4386a15bd9bcd065a5d. Required to support TSDB.


type: keyword

--

*`gcp.labels`*::
+
--
Expand Down
4 changes: 0 additions & 4 deletions x-pack/metricbeat/module/gcp/_meta/fields.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,6 @@
- name: gcp
type: group
fields:
- name: metric_names_fingerprint
type: keyword
description: >
The SHA-256 hash of the comma-separated list metric names collected in the batch. For example, l3.external.ingress_packets.count,l3.external.ingress.bytes > 24848afd85b65b9e168fa5dee12bd3e121c58a504cc0b4386a15bd9bcd065a5d. Required to support TSDB.
- name: labels
type: object
description: >
Expand Down
2 changes: 1 addition & 1 deletion x-pack/metricbeat/module/gcp/fields.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

48 changes: 47 additions & 1 deletion x-pack/metricbeat/module/gcp/metrics/metrics_requester.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,52 @@ func (r *metricsRequester) Metrics(ctx context.Context, serviceName string, alig
var wg sync.WaitGroup
results := make([]timeSeriesWithAligner, 0)

// Find the largest delay in the metrics to collect.
//
// Why do we need find the largest ingest delay in the metrics to collect?
// ======================================================================
//
// We need to share some context first.
//
// Context
// -------
//
// GCP metrics have different ingestion delays; some metrics have zero delay,
// while others have a non-zero delay of up to a few minutes.
//
// For example,
// - `container/memory.limit.bytes` has no ingest delay.
// - `container/memory/request_bytes` has two minutes ingest delay.
//
// Since the metricset collects metrics every 60 seconds, it ends up
// collecting `container/memory.limit.bytes` and `container/memory/request_bytes`
// in different iterations; it stores metrics values in different documents,
// even when they are related to the same timestamp.
//
// Problem
// -------
//
// When TSDB is enabled, two documents cannot have the same timestamp and dimensions.
// If they do, the second document is dropped.
//
// Unfortunately, this is exactly what happens when the metricset collects
// `container/memory.limit.bytes` and `container/memory/request_bytes` in different
// iterations.
//
// Solution
// --------
//
// We calculate the largest delay, and then we collect the metrics values only when
// they are all available.
//
largestDelay := 0 * time.Second
for _, meta := range metricsToCollect {
metricMeta := meta
if meta.ingestDelay > largestDelay {
largestDelay = metricMeta.ingestDelay
}
}

for mt, meta := range metricsToCollect {
wg.Add(1)

Expand All @@ -87,7 +133,7 @@ func (r *metricsRequester) Metrics(ctx context.Context, serviceName string, alig
defer wg.Done()

r.logger.Debugf("For metricType %s, metricMeta = %d, aligner = %s", mt, metricMeta, aligner)
interval, aligner := getTimeIntervalAligner(metricMeta.ingestDelay, metricMeta.samplePeriod, r.config.period, aligner)
interval, aligner := getTimeIntervalAligner(largestDelay, metricMeta.samplePeriod, r.config.period, aligner)
ts := r.Metric(ctx, serviceName, mt, interval, aligner)
lock.Lock()
defer lock.Unlock()
Expand Down
57 changes: 0 additions & 57 deletions x-pack/metricbeat/module/gcp/metrics/timeseries.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ import (
"crypto/sha256"
"encoding/hex"
"fmt"
"strings"

"github.com/elastic/beats/v7/metricbeat/mb"
"github.com/elastic/beats/v7/x-pack/metricbeat/module/gcp"
Expand Down Expand Up @@ -89,51 +88,9 @@ func createEventsFromGroups(service string, groups map[string][]KeyValuePoint) [
MetricSetFields: mapstr.M{},
}

// Collect the metric names in the event and add them to the event
// as `event.metric_names` field.
//
// Why do we need keep track of all the metric names in the event?
// ===============================================================
//
// Context
// -------
//
// GCP metrics have different ingestion delays; some metrics have zero delay,
// while others have a non-zero delay of up to a few minutes.
//
// For example,
// - `container/memory.limit.bytes` has no ingest delay, while
// - `container/memory/request_bytes` has two minutes ingest delay.
//
// Since the metricset collects metrics every 60 seconds, the metricset collects
// `container/memory.limit.bytes` and `container/memory/request_bytes`
// in different iterations, even if they have the same timestamp.
//
// Problem
// -------
//
// When TSDB is enabled, two documents cannot have the same timestamp and dimensions.
// If they do, the second document is dropped.
//
// Unfortunately, this is exactly what happens when the metricset collects
// `container/memory.limit.bytes` and `container/memory/request_bytes` in different
// iterations.
//
// Solution
// --------
//
// Since the metricset collects different metrics in different iterations, we need
// to add an `event.metric_names` field to make sure that the events have different
// dimensions.
//
metricNames := []string{}

for _, singleEvent := range group {
// Add the metric values to the event.
_, _ = event.MetricSetFields.Put(singleEvent.Key, singleEvent.Value)

// Add the metric name to build the `event.metric_names` field.
metricNames = append(metricNames, singleEvent.Key)
}

if service == "compute" {
Expand All @@ -142,13 +99,6 @@ func createEventsFromGroups(service string, groups map[string][]KeyValuePoint) [
event.RootFields = group[0].ECS
}

// Hashes metric names string using SHA-256 to always have
// a constant length value and avoid overflowing the
// current TSDB dimension field limit (1024).
metricNamesHash := hash(strings.Join(metricNames, ","))

_, _ = event.ModuleFields.Put("metric_names_fingerprint", metricNamesHash)

events = append(events, event)
}

Expand Down Expand Up @@ -195,10 +145,3 @@ func (m *MetricSet) groupTimeSeries(ctx context.Context, timeSeries []timeSeries

return groupedMetrics
}

// hash return the SHA-256 hash of the input string.
func hash(s string) string {
h := sha256.New()
h.Write([]byte(s))
return hex.EncodeToString(h.Sum(nil))
}
3 changes: 0 additions & 3 deletions x-pack/metricbeat/module/gcp/metrics/timeseries_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -453,7 +453,6 @@ func TestCreateEventsFromGroup(t *testing.T) {
{
Timestamp: timestampGroup1,
ModuleFields: mapstr.M{
"metric_names_fingerprint": "2a990d0ce61c177d3bc05def9f536de164d3ea10056b7120414ad370a0f686c0",
"labels": mapstr.M{
"user.deployment": "deploy-1",
"user.division": "div-1",
Expand All @@ -480,7 +479,6 @@ func TestCreateEventsFromGroup(t *testing.T) {
{
Timestamp: timestampGroup2,
ModuleFields: mapstr.M{
"metric_names_fingerprint": "f7c8d8a9a75411effbd2171a8fa984a5e618eeca3911829b03c7e3e3d6e74522",
"labels": mapstr.M{
"user.deployment": "deploy-1",
"user.division": "div-1",
Expand All @@ -507,7 +505,6 @@ func TestCreateEventsFromGroup(t *testing.T) {
{
Timestamp: timestampGroup3,
ModuleFields: mapstr.M{
"metric_names_fingerprint": "ce7dee36d9ab56de52176bf05d21c0bc446da9a3134727bf5e300604f2ca63ef",
"labels": mapstr.M{
"user.deployment": "deploy-1",
"user.division": "div-1",
Expand Down

0 comments on commit f655e50

Please sign in to comment.