From b63033662c350a00fb5736b496b8f265c5b3a72f Mon Sep 17 00:00:00 2001 From: Paolo Chila Date: Tue, 10 Oct 2023 10:20:17 +0200 Subject: [PATCH 1/6] Increase metrics interval to 60s --- .../application/monitoring/v1_monitor.go | 15 +-- .../application/monitoring/v1_monitor_test.go | 92 +++++++++++++++++++ 2 files changed, 101 insertions(+), 6 deletions(-) create mode 100644 internal/pkg/agent/application/monitoring/v1_monitor_test.go diff --git a/internal/pkg/agent/application/monitoring/v1_monitor.go b/internal/pkg/agent/application/monitoring/v1_monitor.go index 5c3f94e3e6a..a161bf679f2 100644 --- a/internal/pkg/agent/application/monitoring/v1_monitor.go +++ b/internal/pkg/agent/application/monitoring/v1_monitor.go @@ -12,6 +12,7 @@ import ( "path/filepath" "runtime" "strings" + "time" "unicode" "github.com/elastic/elastic-agent/pkg/component" @@ -517,6 +518,8 @@ func (b *BeatsMonitor) monitoringNamespace() string { } func (b *BeatsMonitor) injectMetricsInput(cfg map[string]interface{}, componentIDToBinary map[string]string, monitoringOutputName string, componentList []component.Component) error { + metricsCollectionInterval := 60 * time.Second + metricsCollectionIntervalString := metricsCollectionInterval.String() monitoringNamespace := b.monitoringNamespace() fixedAgentName := strings.ReplaceAll(agentName, "-", "_") beatsStreams := make([]interface{}, 0, len(componentIDToBinary)) @@ -532,7 +535,7 @@ func (b *BeatsMonitor) injectMetricsInput(cfg map[string]interface{}, componentI "path": "/stats", "hosts": []interface{}{HttpPlusAgentMonitoringEndpoint(b.operatingSystem, b.config.C)}, "namespace": "agent", - "period": "10s", + "period": metricsCollectionIntervalString, "index": fmt.Sprintf("metrics-elastic_agent.%s-%s", fixedAgentName, monitoringNamespace), "processors": []interface{}{ map[string]interface{}{ @@ -608,7 +611,7 @@ func (b *BeatsMonitor) injectMetricsInput(cfg map[string]interface{}, componentI }, "metricsets": []interface{}{"stats", "state"}, "hosts": endpoints, - "period": "10s", + "period": metricsCollectionIntervalString, "index": fmt.Sprintf("metrics-elastic_agent.%s-%s", name, monitoringNamespace), "processors": []interface{}{ map[string]interface{}{ @@ -663,7 +666,7 @@ func (b *BeatsMonitor) injectMetricsInput(cfg map[string]interface{}, componentI "hosts": endpoints, "path": "/stats", "namespace": "agent", - "period": "10s", + "period": metricsCollectionIntervalString, "index": fmt.Sprintf("metrics-elastic_agent.%s-%s", fixedAgentName, monitoringNamespace), "processors": []interface{}{ map[string]interface{}{ @@ -725,7 +728,7 @@ func (b *BeatsMonitor) injectMetricsInput(cfg map[string]interface{}, componentI "path": "/inputs/", "namespace": fbDataStreamName, "json.is_array": true, - "period": "10s", + "period": metricsCollectionIntervalString, "index": fmt.Sprintf("metrics-elastic_agent.%s-%s", fbDataStreamName, monitoringNamespace), "processors": []interface{}{ map[string]interface{}{ @@ -799,7 +802,7 @@ func (b *BeatsMonitor) injectMetricsInput(cfg map[string]interface{}, componentI "path": "/shipper", "hosts": endpoints, "namespace": "application", - "period": "10s", + "period": metricsCollectionIntervalString, "processors": createProcessorsForJSONInput(name, monitoringNamespace, b.agentInfo), }, map[string]interface{}{ @@ -813,7 +816,7 @@ func (b *BeatsMonitor) injectMetricsInput(cfg map[string]interface{}, componentI "path": "/stats", "hosts": endpoints, "namespace": "agent", - "period": "10s", + "period": metricsCollectionIntervalString, "processors": createProcessorsForJSONInput(name, monitoringNamespace, b.agentInfo), }) } diff --git a/internal/pkg/agent/application/monitoring/v1_monitor_test.go b/internal/pkg/agent/application/monitoring/v1_monitor_test.go new file mode 100644 index 00000000000..2bc71cddb1e --- /dev/null +++ b/internal/pkg/agent/application/monitoring/v1_monitor_test.go @@ -0,0 +1,92 @@ +package monitoring + +import ( + "context" + "runtime" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "gopkg.in/yaml.v2" + + "github.com/elastic/elastic-agent/internal/pkg/agent/application/info" + monitoringcfg "github.com/elastic/elastic-agent/internal/pkg/core/monitoring/config" +) + +func TestMonitoringConfigMetricsInterval(t *testing.T) { + + agentInfo, err := info.NewAgentInfo(context.Background(), false) + require.NoError(t, err, "Error creating agent info") + + mCfg := &monitoringConfig{ + C: &monitoringcfg.MonitoringConfig{ + Enabled: true, + MonitorMetrics: true, + HTTP: &monitoringcfg.MonitoringHTTPConfig{ + Enabled: false, + }, + }, + } + + policy := map[string]any{ + "agent": map[string]any{ + "monitoring": map[string]any{ + "metrics": true, + "http": map[string]any{ + "enabled": false, + }, + }, + }, + "outputs": map[string]any{ + "default": map[string]any{}, + }, + } + b := &BeatsMonitor{ + enabled: true, + config: mCfg, + operatingSystem: runtime.GOOS, + agentInfo: agentInfo, + } + got, err := b.MonitoringConfig(policy, nil, map[string]string{"foobeat": "filebeat"}) // put a componentID/binary mapping to have something in the beats monitoring input + assert.NoError(t, err) + + rawInputs, ok := got["inputs"] + require.True(t, ok, "monitoring config contains no input") + inputs, ok := rawInputs.([]any) + require.True(t, ok, "monitoring inputs are not a list") + marshaledInputs, err := yaml.Marshal(inputs) + if assert.NoError(t, err, "error marshaling monitoring inputs") { + t.Logf("marshaled monitoring inputs:\n%s\n", marshaledInputs) + } + + // loop over the created inputs + for _, i := range inputs { + input, ok := i.(map[string]any) + if assert.Truef(t, ok, "input is not represented as a map: %v", i) { + inputID := input["id"] + t.Logf("input %q", inputID) + // check the streams created for the input, should be a list of objects + if assert.Contains(t, input, "streams", "input %q does not contain any stream", inputID) && + assert.IsTypef(t, []any{}, input["streams"], "streams for input %q are not a list of objects", inputID) { + // loop over streams and cast to map[string]any to access keys + for _, rawStream := range input["streams"].([]any) { + if assert.IsTypef(t, map[string]any{}, rawStream, "stream %v for input %q is not a map", rawStream, inputID) { + stream := rawStream.(map[string]any) + // check period and assert its value + streamID := stream["id"] + if assert.Containsf(t, stream, "period", "stream %q for input %q does not contain a period", streamID, inputID) && + assert.IsType(t, "", stream["period"], "period for stream %q of input %q is not represented as a string", streamID, inputID) { + periodString := stream["period"].(string) + duration, err := time.ParseDuration(periodString) + if assert.NoErrorf(t, err, "Unparseable period duration %s for stream %q of input %q", periodString, streamID, inputID) { + assert.Equalf(t, duration, 60*time.Second, "unexpected duration for stream %q of input %q", streamID, inputID) + } + } + } + } + } + } + + } +} From 5b147b2bb6801196b66797ab2bd282c6073ff559 Mon Sep 17 00:00:00 2001 From: Paolo Chila Date: Tue, 10 Oct 2023 18:06:34 +0200 Subject: [PATCH 2/6] fixup! Increase metrics interval to 60s --- internal/pkg/agent/application/monitoring/v1_monitor_test.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/internal/pkg/agent/application/monitoring/v1_monitor_test.go b/internal/pkg/agent/application/monitoring/v1_monitor_test.go index 2bc71cddb1e..9c7ea042d7f 100644 --- a/internal/pkg/agent/application/monitoring/v1_monitor_test.go +++ b/internal/pkg/agent/application/monitoring/v1_monitor_test.go @@ -1,3 +1,7 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + package monitoring import ( From 255a15ec10005db850a4d4c7d44467f3ce7a8098 Mon Sep 17 00:00:00 2001 From: Paolo Chila Date: Tue, 10 Oct 2023 18:27:35 +0200 Subject: [PATCH 3/6] add changelog --- ...nt-monitoring-metrics-interval-to-60s.yaml | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 changelog/fragments/1696955150-Slow-down-agent-monitoring-metrics-interval-to-60s.yaml diff --git a/changelog/fragments/1696955150-Slow-down-agent-monitoring-metrics-interval-to-60s.yaml b/changelog/fragments/1696955150-Slow-down-agent-monitoring-metrics-interval-to-60s.yaml new file mode 100644 index 00000000000..b220a0ab71a --- /dev/null +++ b/changelog/fragments/1696955150-Slow-down-agent-monitoring-metrics-interval-to-60s.yaml @@ -0,0 +1,32 @@ +# Kind can be one of: +# - breaking-change: a change to previously-documented behavior +# - deprecation: functionality that is being removed in a later release +# - bug-fix: fixes a problem in a previous version +# - enhancement: extends functionality but does not break or fix existing behavior +# - feature: new functionality +# - known-issue: problems that we are aware of in a given version +# - security: impacts on the security of a product or a user’s deployment. +# - upgrade: important information for someone upgrading from a prior version +# - other: does not fit into any of the other categories +kind: feature + +# Change summary; a 80ish characters long description of the change. +summary: Increase agent monitoring metrics interval from 10s to 60s to reduce load + +# Long description; in case the summary is not enough to describe the change +# this field accommodate a description without length limits. +# NOTE: This field will be rendered only for breaking-change and known-issue kinds at the moment. +#description: + +# Affected component; a word indicating the component this changeset affects. +component: monitoring + +# PR URL; optional; the PR number that added the changeset. +# If not present is automatically filled by the tooling finding the PR where this changelog fragment has been added. +# NOTE: the tooling supports backports, so it's able to fill the original PR number instead of the backport PR number. +# Please provide it if you are adding a fragment for a different PR. +#pr: https://github.com/owner/repo/1234 + +# Issue URL; optional; the GitHub issue related to this changeset (either closes or is part of). +# If not present is automatically filled by the tooling with the issue linked to the PR number. +#issue: https://github.com/owner/repo/1234 From 80135324d204c2cbb21e44cfe329145ab2f64e90 Mon Sep 17 00:00:00 2001 From: Paolo Chila Date: Wed, 11 Oct 2023 08:32:21 +0200 Subject: [PATCH 4/6] fixup! add changelog --- ...5150-Slow-down-agent-monitoring-metrics-interval-to-60s.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/changelog/fragments/1696955150-Slow-down-agent-monitoring-metrics-interval-to-60s.yaml b/changelog/fragments/1696955150-Slow-down-agent-monitoring-metrics-interval-to-60s.yaml index b220a0ab71a..bf86933d97e 100644 --- a/changelog/fragments/1696955150-Slow-down-agent-monitoring-metrics-interval-to-60s.yaml +++ b/changelog/fragments/1696955150-Slow-down-agent-monitoring-metrics-interval-to-60s.yaml @@ -8,7 +8,7 @@ # - security: impacts on the security of a product or a user’s deployment. # - upgrade: important information for someone upgrading from a prior version # - other: does not fit into any of the other categories -kind: feature +kind: enhancement # Change summary; a 80ish characters long description of the change. summary: Increase agent monitoring metrics interval from 10s to 60s to reduce load From 4303ee0e0b88bfdbe26d1a99360ee7f791a6ff3a Mon Sep 17 00:00:00 2001 From: Paolo Chila Date: Wed, 11 Oct 2023 08:32:42 +0200 Subject: [PATCH 5/6] fixup! fixup! Increase metrics interval to 60s --- internal/pkg/agent/application/monitoring/v1_monitor.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/internal/pkg/agent/application/monitoring/v1_monitor.go b/internal/pkg/agent/application/monitoring/v1_monitor.go index a161bf679f2..3bbfb2b4fef 100644 --- a/internal/pkg/agent/application/monitoring/v1_monitor.go +++ b/internal/pkg/agent/application/monitoring/v1_monitor.go @@ -52,6 +52,9 @@ const ( agentName = "elastic-agent" windowsOS = "windows" + + // metricset execution period used for the monitoring metrics inputs + metricsCollectionInterval = 60 * time.Second ) var ( @@ -518,7 +521,7 @@ func (b *BeatsMonitor) monitoringNamespace() string { } func (b *BeatsMonitor) injectMetricsInput(cfg map[string]interface{}, componentIDToBinary map[string]string, monitoringOutputName string, componentList []component.Component) error { - metricsCollectionInterval := 60 * time.Second + metricsCollectionIntervalString := metricsCollectionInterval.String() monitoringNamespace := b.monitoringNamespace() fixedAgentName := strings.ReplaceAll(agentName, "-", "_") From 4d71ff6642e2586048e98e44921c42a900bdf9d9 Mon Sep 17 00:00:00 2001 From: Paolo Chila Date: Wed, 11 Oct 2023 16:29:31 +0200 Subject: [PATCH 6/6] fixup! fixup! fixup! Increase metrics interval to 60s --- internal/pkg/agent/application/monitoring/v1_monitor.go | 1 + 1 file changed, 1 insertion(+) diff --git a/internal/pkg/agent/application/monitoring/v1_monitor.go b/internal/pkg/agent/application/monitoring/v1_monitor.go index 3bbfb2b4fef..79522096b8c 100644 --- a/internal/pkg/agent/application/monitoring/v1_monitor.go +++ b/internal/pkg/agent/application/monitoring/v1_monitor.go @@ -54,6 +54,7 @@ const ( windowsOS = "windows" // metricset execution period used for the monitoring metrics inputs + // we set this to 60s to reduce the load/data volume on the monitoring cluster metricsCollectionInterval = 60 * time.Second )