From 509b1cca592a2af0e1e80721d6090f70dc64bb65 Mon Sep 17 00:00:00 2001 From: VihasMakwana <121151420+VihasMakwana@users.noreply.github.com> Date: Wed, 31 Jul 2024 00:11:40 +0530 Subject: [PATCH] [tests][integration] fix monitoring test cases (#5208) * chore: fix monitoring test cases * fix: update long running tests * fix: change ExecStatus and return nil * fix: update docstring, non-empty * fix: reword * fix: nit * fix: empty line * fix: join errors. * temproray logs to examine CI * Revert "temproray logs to examine CI" remove logs as the test failure has been identified This reverts commit dc392b3c7fc71d564965d5ed7562cbc53a8a90a4. * fix: update condition --- pkg/testing/fixture.go | 20 +++++++------------ .../agent_long_running_leak_test.go | 15 +++++++++++--- ...monitoring_probe_preserve_text_cfg_test.go | 6 +++++- .../monitoring_probe_reload_test.go | 5 ++++- 4 files changed, 28 insertions(+), 18 deletions(-) diff --git a/pkg/testing/fixture.go b/pkg/testing/fixture.go index 75da9f9c94a..c7338cc63d1 100644 --- a/pkg/testing/fixture.go +++ b/pkg/testing/fixture.go @@ -709,26 +709,20 @@ func (e *ExecErr) Unwrap() error { // ExecStatus executes the status subcommand on the prepared Elastic Agent binary. // It returns the parsed output and the error from the execution. Keep in mind // the agent exits with status 1 if it's unhealthy, but it still outputs the -// status successfully. Therefore, a non-empty AgentStatusOutput is valid -// regardless of the error. An empty AgentStatusOutput and non nil error -// means the output could not be parsed. Use AgentStatusOutput.IsZero() to -// determine if the returned AgentStatusOutput is empty or not. +// status successfully. An empty AgentStatusOutput and non nil error +// means the output could not be parsed. +// As long as we get some output, we don't return any error. // It should work with any 8.6+ agent func (f *Fixture) ExecStatus(ctx context.Context, opts ...process.CmdOption) (AgentStatusOutput, error) { out, err := f.Exec(ctx, []string{"status", "--output", "json"}, opts...) status := AgentStatusOutput{} if uerr := json.Unmarshal(out, &status); uerr != nil { return AgentStatusOutput{}, - fmt.Errorf("could not unmarshal agent status output: %w", - errors.Join(&ExecErr{ - err: err, - Output: out, - }, uerr)) + fmt.Errorf("could not unmarshal agent status output: %w", errors.Join(uerr, err)) + } else if status.IsZero() { + return status, fmt.Errorf("agent status output is empty: %w", err) } - if err != nil { - return status, fmt.Errorf("error running command (output: %s): %w", string(out), err) - } return status, nil } @@ -1243,7 +1237,7 @@ type AgentStatusOutput struct { } func (aso *AgentStatusOutput) IsZero() bool { - return aso.Info.ID == "" + return aso.Info.ID == "" && aso.Message == "" && aso.Info.Version == "" } type AgentInspectOutput struct { diff --git a/testing/integration/agent_long_running_leak_test.go b/testing/integration/agent_long_running_leak_test.go index 0d1a731d3f7..f3862665771 100644 --- a/testing/integration/agent_long_running_leak_test.go +++ b/testing/integration/agent_long_running_leak_test.go @@ -210,13 +210,16 @@ func (runner *ExtendedRunner) CheckHealthAtStartup(ctx context.Context) { require.Eventually(runner.T(), func() bool { allHealthy := true status, err := runner.agentFixture.ExecStatus(ctx) + if err != nil { + runner.T().Logf("agent status returned an error: %v", err) + return false + } apacheMatch := "logfile-apache" foundApache := false systemMatch := "system/metrics" foundSystem := false - require.NoError(runner.T(), err) for _, comp := range status.Components { // make sure the components include the expected integrations for _, v := range comp.Units { @@ -270,7 +273,10 @@ func (gm *goroutinesMonitor) Init(ctx context.Context, t *testing.T, fixture *at paths.SetTop("/opt/Elastic/Agent") // fetch the unit ID of the component, use that to generate the path to the unix socket status, err := fixture.ExecStatus(ctx) - require.NoError(t, err) + if err != nil { + t.Logf("agent status returned an error: %v", err) + } + for _, comp := range status.Components { unitId := comp.ID socketPath := utils.SocketURLWithFallback(unitId, paths.TempDir()) @@ -352,7 +358,10 @@ func (handleMon *handleMonitor) Init(ctx context.Context, t *testing.T, fixture // so separately fetch the PIDs pidInStatusMessageRegex := regexp.MustCompile(`[\d]+`) status, err := fixture.ExecStatus(ctx) - require.NoError(t, err) + if err != nil { + t.Logf("agent status returned an error: %v", err) + } + for _, comp := range status.Components { pidStr := pidInStatusMessageRegex.FindString(comp.Message) pid, err := strconv.ParseInt(pidStr, 10, 64) diff --git a/testing/integration/monitoring_probe_preserve_text_cfg_test.go b/testing/integration/monitoring_probe_preserve_text_cfg_test.go index a7af3c597f0..a4e76d6b075 100644 --- a/testing/integration/monitoring_probe_preserve_text_cfg_test.go +++ b/testing/integration/monitoring_probe_preserve_text_cfg_test.go @@ -54,6 +54,7 @@ inputs: - filesystem data_stream.dataset: system.filesystem agent.monitoring: + metrics_period: 1s http: enabled: true port: 6791 @@ -188,8 +189,11 @@ func (runner *MonitoringTextRunner) AllComponentsHealthy(ctx context.Context) { require.Eventually(runner.T(), func() bool { allHealthy := true status, err := runner.agentFixture.ExecStatus(ctx) + if err != nil { + runner.T().Logf("agent status returned an error: %v", err) + return false + } - require.NoError(runner.T(), err) for _, comp := range status.Components { runner.T().Logf("component state: %s", comp.Message) if comp.State != int(cproto.State_HEALTHY) { diff --git a/testing/integration/monitoring_probe_reload_test.go b/testing/integration/monitoring_probe_reload_test.go index bfb28f16c4f..e456be40c6d 100644 --- a/testing/integration/monitoring_probe_reload_test.go +++ b/testing/integration/monitoring_probe_reload_test.go @@ -166,8 +166,11 @@ func (runner *MonitoringRunner) AllComponentsHealthy(ctx context.Context) { require.Eventually(runner.T(), func() bool { allHealthy := true status, err := runner.agentFixture.ExecStatus(ctx) + if err != nil { + runner.T().Logf("agent status returned an error: %v", err) + return false + } - require.NoError(runner.T(), err) for _, comp := range status.Components { runner.T().Logf("component state: %s", comp.Message) if comp.State != int(cproto.State_HEALTHY) {