Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[HA Agent] Add datadog.agent.ha_agent.integration_runs #33233

Merged
merged 31 commits into from
Feb 3, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
6203116
Add config version tag in metric
coignetp Jan 17, 2025
027ba6a
Use fleet policies dir instead
coignetp Jan 17, 2025
b5634e7
Cleaner
coignetp Jan 17, 2025
7e52c66
Fix
coignetp Jan 17, 2025
b198953
Rename
coignetp Jan 17, 2025
bf0993b
Change to configid
coignetp Jan 20, 2025
3c0593c
Fixed tag
coignetp Jan 21, 2025
609e43c
[HA Agent] add config_id tag to datadog.agent.ha_agent.running metric
AlexandreYang Jan 22, 2025
b772211
Revert "Revert "update e2e test to assert config_id""
AlexandreYang Jan 22, 2025
6a06cd3
rename agent_state to ha_agent_state
AlexandreYang Jan 22, 2025
e66ec05
add config_id tag
AlexandreYang Jan 23, 2025
28d02d4
fix test
AlexandreYang Jan 23, 2025
a1e484a
[HA Agent] add integration metrics
AlexandreYang Jan 22, 2025
c2f69ca
refactor
AlexandreYang Jan 23, 2025
0e99be9
revert pkg/collector/worker/worker_test.go
AlexandreYang Jan 23, 2025
8e41da0
move to stats.go
AlexandreYang Jan 24, 2025
d627206
sort import
AlexandreYang Jan 24, 2025
8d28957
fix guiimpl/checks.go
AlexandreYang Jan 24, 2025
c4f766e
fix pkg/cli/subcommands/check/command.go
AlexandreYang Jan 24, 2025
0a62fb6
update to config_id
AlexandreYang Jan 24, 2025
3e5b66e
Address reviews
coignetp Jan 28, 2025
acd7ada
Update aggregator.go
coignetp Jan 30, 2025
6a75ed5
Update config.go
coignetp Jan 31, 2025
b5a9cb7
Update config.go
coignetp Jan 31, 2025
52a18fc
remove fixed comment
AlexandreYang Jan 31, 2025
9b7689f
Merge branch 'main' into paul/fa-add-tag-config-v
AlexandreYang Feb 1, 2025
072f212
Merge branch 'paul/fa-add-tag-config-v' into NDMII-3313-datadog-agent…
AlexandreYang Feb 1, 2025
ed96e97
Merge branch 'NDMII-3313-datadog-agent-ha-agent-running' into NDMII-3…
AlexandreYang Feb 1, 2025
e18aa87
Merge branch 'main' into NDMII-3313-datadog-agent-ha-agent-running
AlexandreYang Feb 1, 2025
050dfa0
Merge branch 'NDMII-3313-datadog-agent-ha-agent-running' into NDMII-3…
AlexandreYang Feb 1, 2025
b5001c3
Merge branch 'main' into NDMII-3313-integration-metric
AlexandreYang Feb 1, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion comp/core/gui/guiimpl/checks.go
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ func runCheckOnce(w http.ResponseWriter, r *http.Request, ac autodiscovery.Compo
err := ch.Run()
warnings := ch.GetWarnings()
sStats, _ := ch.GetSenderStats()
s.Add(time.Since(t0), err, warnings, sStats)
s.Add(time.Since(t0), err, warnings, sStats, nil)

// Without a small delay some of the metrics will not show up
time.Sleep(100 * time.Millisecond)
Expand Down
3 changes: 3 additions & 0 deletions comp/haagent/def/component.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,7 @@ type Component interface {

// ShouldRunIntegration returns true if the integration should be run
ShouldRunIntegration(integrationName string) bool

// IsHaIntegration return true if it's an HA integration.
IsHaIntegration(integrationName string) bool
}
5 changes: 5 additions & 0 deletions comp/haagent/impl/haagent.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,11 @@ func (h *haAgentImpl) ShouldRunIntegration(integrationName string) bool {
return true
}

// IsHaIntegration return true if it's an HA integration.
func (h *haAgentImpl) IsHaIntegration(integrationName string) bool {
return validHaIntegrations[integrationName]
}

func (h *haAgentImpl) onHaAgentUpdate(updates map[string]state.RawConfig, applyStateCallback func(string, state.ApplyStatus)) {
h.log.Debugf("Updates received: count=%d", len(updates))

Expand Down
37 changes: 37 additions & 0 deletions comp/haagent/impl/haagent_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,43 @@ func Test_haAgentImpl_ShouldRunIntegration(t *testing.T) {
}
}

func Test_haAgentImpl_IsIntegration(t *testing.T) {
testAgentHostname := "my-agent-hostname"
tests := []struct {
name string
leader string
agentConfigs map[string]interface{}
expectIsHaIntegration map[string]bool
}{
{
name: "base case",
// should run HA-integrations
// should run "non HA integrations"
agentConfigs: map[string]interface{}{
"hostname": testAgentHostname,
},
leader: testAgentHostname,
expectIsHaIntegration: map[string]bool{
"snmp": true,
"cisco_aci": true,
"cisco_sdwan": true,
"network_path": true,
"unknown_integration": false,
"cpu": false,
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
haAgent := newTestHaAgentComponent(t, tt.agentConfigs)

for integrationName, shouldRun := range tt.expectIsHaIntegration {
assert.Equalf(t, shouldRun, haAgent.Comp.IsHaIntegration(integrationName), "fail for integration: "+integrationName)
}
})
}
}

func Test_haAgentImpl_resetAgentState(t *testing.T) {
// GIVEN
haAgent := newTestHaAgentComponent(t, nil)
Expand Down
4 changes: 4 additions & 0 deletions comp/haagent/mock/mock.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,10 @@ func (m *mockHaAgent) ShouldRunIntegration(_ string) bool {
return true
}

func (m *mockHaAgent) IsHaIntegration(_ string) bool {
return true
}

// Component is the component type.
type Component interface {
haagent.Component
Expand Down
2 changes: 1 addition & 1 deletion pkg/cli/subcommands/check/command.go
Original file line number Diff line number Diff line change
Expand Up @@ -648,7 +648,7 @@ func runCheck(cliParams *cliParams, c check.Check, _ aggregator.Demultiplexer) *
err := c.Run()
warnings := c.GetWarnings()
sStats, _ := c.GetSenderStats()
s.Add(time.Since(t0), err, warnings, sStats)
s.Add(time.Since(t0), err, warnings, sStats, nil)
if pause > 0 && i < times-1 {
time.Sleep(time.Duration(pause) * time.Millisecond)
}
Expand Down
13 changes: 12 additions & 1 deletion pkg/collector/check/stats/stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (

"github.com/mitchellh/mapstructure"

haagent "github.com/DataDog/datadog-agent/comp/haagent/def"
checkid "github.com/DataDog/datadog-agent/pkg/collector/check/id"
pkgconfigsetup "github.com/DataDog/datadog-agent/pkg/config/setup"
"github.com/DataDog/datadog-agent/pkg/config/utils"
Expand Down Expand Up @@ -55,6 +56,13 @@ var (
"delay",
[]string{"check_name"},
"Check start time delay relative to the previous check run")
tlmHaAgentIntegrationRuns = telemetry.NewCounterWithOpts(
"ha_agent",
"integration_runs",
[]string{"integration", "config_id"},
"Tracks number of HA integrations runs.",
telemetry.Options{DefaultMetric: true},
)
)

// SenderStats contains statistics showing the count of various types of telemetry sent by a check sender
Expand Down Expand Up @@ -161,7 +169,7 @@ func NewStats(c StatsCheck) *Stats {
}

// Add tracks a new execution time
func (cs *Stats) Add(t time.Duration, err error, warnings []error, metricStats SenderStats) {
func (cs *Stats) Add(t time.Duration, err error, warnings []error, metricStats SenderStats, haagent haagent.Component) {
cs.m.Lock()
defer cs.m.Unlock()

Expand Down Expand Up @@ -249,6 +257,9 @@ func (cs *Stats) Add(t time.Duration, err error, warnings []error, metricStats S
cs.TotalEventPlatformEvents[k] = cs.TotalEventPlatformEvents[k] + v
cs.EventPlatformEvents[k] = v
}
if haagent != nil && haagent.Enabled() && haagent.IsHaIntegration(cs.CheckName) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you think we could include the haagent.Enabled() check in IsHaIntegration to simplify usage ?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hum, I think it's not ideal since they mean different things. Can be misleading to have haagent.Enabled() check in IsHaIntegration() since a check can be flagged as HA, but HA Agent feature can be disabled.

tlmHaAgentIntegrationRuns.Inc(cs.CheckName, pkgconfigsetup.Datadog().GetString("config_id"))
}
}

// SetStateCancelling sets the check stats to be in a cancelling state
Expand Down
7 changes: 4 additions & 3 deletions pkg/collector/runner/expvars/expvars.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (

"github.com/mohae/deepcopy"

haagent "github.com/DataDog/datadog-agent/comp/haagent/def"
"github.com/DataDog/datadog-agent/pkg/collector/check"
checkid "github.com/DataDog/datadog-agent/pkg/collector/check/id"
checkstats "github.com/DataDog/datadog-agent/pkg/collector/check/stats"
Expand Down Expand Up @@ -105,12 +106,12 @@ func GetCheckStats() map[string]map[checkid.ID]*checkstats.Stats {
}

// AddCheckStats adds runtime stats to the check's expvars
func AddCheckStats(
c check.Check,
func AddCheckStats(c check.Check,
execTime time.Duration,
err error,
warnings []error,
mStats checkstats.SenderStats,
haagent haagent.Component,
) {

var s *checkstats.Stats
Expand All @@ -133,7 +134,7 @@ func AddCheckStats(
stats[c.ID()] = s
}

s.Add(execTime, err, warnings, mStats)
s.Add(execTime, err, warnings, mStats, haagent)
}

// RemoveCheckStats removes a check from the check stats map
Expand Down
9 changes: 5 additions & 4 deletions pkg/collector/runner/expvars/expvars_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"

haagentmock "github.com/DataDog/datadog-agent/comp/haagent/mock"
checkid "github.com/DataDog/datadog-agent/pkg/collector/check/id"
"github.com/DataDog/datadog-agent/pkg/collector/check/stats"
"github.com/DataDog/datadog-agent/pkg/collector/check/stub"
Expand Down Expand Up @@ -171,7 +172,7 @@ func TestExpvarsReset(t *testing.T) {
testCheck := newTestCheck(checkID)

for runIdx := 0; runIdx < numCheckRuns; runIdx++ {
AddCheckStats(testCheck, 12345, nil, []error{}, stats.SenderStats{})
AddCheckStats(testCheck, 12345, nil, []error{}, stats.SenderStats{}, haagentmock.NewMockHaAgent())
}
}
}
Expand Down Expand Up @@ -243,7 +244,7 @@ func TestExpvarsCheckStats(t *testing.T) {

<-start

AddCheckStats(testCheck, duration, err, warnings, expectedStats)
AddCheckStats(testCheck, duration, err, warnings, expectedStats, haagentmock.NewMockHaAgent())

actualStats, found := CheckStats(testCheck.ID())
require.True(t, found)
Expand Down Expand Up @@ -319,7 +320,7 @@ func TestExpvarsGetChecksStatsClone(t *testing.T) {
testCheck := newTestCheck(checkID)

for runIdx := 0; runIdx < numCheckRuns; runIdx++ {
AddCheckStats(testCheck, 12345, nil, []error{}, stats.SenderStats{})
AddCheckStats(testCheck, 12345, nil, []error{}, stats.SenderStats{}, haagentmock.NewMockHaAgent())
}
}
}
Expand Down Expand Up @@ -424,7 +425,7 @@ func TestGetCheckStatsRace(t *testing.T) {

warnings := []error{errors.New("error1"), errors.New("error2"), errors.New("error3")}
for runIdx := 0; runIdx < numCheckRuns; runIdx++ {
AddCheckStats(testCheck, 12345, nil, warnings, stats.SenderStats{})
AddCheckStats(testCheck, 12345, nil, warnings, stats.SenderStats{}, haagentmock.NewMockHaAgent())
}
}
}
Expand Down
3 changes: 2 additions & 1 deletion pkg/collector/worker/check_logger_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (

"github.com/stretchr/testify/assert"

haagentmock "github.com/DataDog/datadog-agent/comp/haagent/mock"
"github.com/DataDog/datadog-agent/pkg/collector/check"
checkid "github.com/DataDog/datadog-agent/pkg/collector/check/id"
"github.com/DataDog/datadog-agent/pkg/collector/check/stats"
Expand All @@ -32,7 +33,7 @@ func newTestCheck(id string) *stubCheck {
}

func addExpvarsCheckStats(c check.Check) {
expvars.AddCheckStats(c, 0, nil, nil, stats.SenderStats{})
expvars.AddCheckStats(c, 0, nil, nil, stats.SenderStats{}, haagentmock.NewMockHaAgent())
}

func setUp() {
Expand Down
2 changes: 1 addition & 1 deletion pkg/collector/worker/worker.go
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ func (w *Worker) Run() {
// otherwise only do so if the check is in the scheduler
if w.shouldAddCheckStatsFunc(check.ID()) {
sStats, _ := check.GetSenderStats()
expvars.AddCheckStats(check, time.Since(checkStartTime), checkErr, checkWarnings, sStats)
expvars.AddCheckStats(check, time.Since(checkStartTime), checkErr, checkWarnings, sStats, w.haAgent)
}
}

Expand Down
Loading