From 36e66888ce001c7ef95f93ac8a9e89279184574e Mon Sep 17 00:00:00 2001 From: kaanyalti Date: Tue, 3 Dec 2024 19:27:15 -0500 Subject: [PATCH 01/20] enhancement(5832): added integration tests --- .../integration/restrict_upgrade_deb_test.go | 54 +++++++++++++++++++ .../integration/restrict_upgrade_rpm_test.go | 54 +++++++++++++++++++ 2 files changed, 108 insertions(+) create mode 100644 testing/integration/restrict_upgrade_deb_test.go create mode 100644 testing/integration/restrict_upgrade_rpm_test.go diff --git a/testing/integration/restrict_upgrade_deb_test.go b/testing/integration/restrict_upgrade_deb_test.go new file mode 100644 index 00000000000..d4589227b0b --- /dev/null +++ b/testing/integration/restrict_upgrade_deb_test.go @@ -0,0 +1,54 @@ +//go:build integration + +package integration + +import ( + "context" + "testing" + "time" + + "github.com/elastic/elastic-agent/internal/pkg/agent/cmd" + atesting "github.com/elastic/elastic-agent/pkg/testing" + "github.com/elastic/elastic-agent/pkg/testing/define" + "github.com/stretchr/testify/require" +) + +func TestRestrictUpgradeDeb(t *testing.T) { + define.Require(t, define.Requirements{ + Group: Deb, + Stack: &define.Stack{}, + Sudo: true, + OS: []define.OS{ + { + Type: define.Linux, + Distro: "ubuntu", + }, + }, + }) + t.Run("when agent is deployed via deb, a user should not be able to upgrade the agent using the cli", func(t *testing.T) { + ctx := context.Background() + + fixture, err := define.NewFixtureFromLocalBuild(t, define.Version(), atesting.WithPackageFormat("deb")) + require.NoError(t, err) + installOpts := atesting.InstallOpts{ + NonInteractive: true, + Privileged: true, + Force: true, + } + + _, err = fixture.InstallWithoutEnroll(ctx, &installOpts) + require.NoError(t, err) + + require.Eventuallyf(t, func() bool { + err = fixture.IsHealthy(ctx) + return err == nil + }, 5*time.Minute, time.Second, + "Elastic-Agent did not report healthy. Agent status error: \"%v\"", + err, + ) + + out, err := fixture.Exec(ctx, []string{"upgrade", "1.0.0"}) + require.Error(t, err) + require.Contains(t, string(out), cmd.UpgradeDisabledError.Error()) + }) +} diff --git a/testing/integration/restrict_upgrade_rpm_test.go b/testing/integration/restrict_upgrade_rpm_test.go new file mode 100644 index 00000000000..f99d52f9e07 --- /dev/null +++ b/testing/integration/restrict_upgrade_rpm_test.go @@ -0,0 +1,54 @@ +//go:build integration + +package integration + +import ( + "context" + "testing" + "time" + + "github.com/elastic/elastic-agent/internal/pkg/agent/cmd" + atesting "github.com/elastic/elastic-agent/pkg/testing" + "github.com/elastic/elastic-agent/pkg/testing/define" + "github.com/stretchr/testify/require" +) + +func TestRestrictUpgradeRPM(t *testing.T) { + define.Require(t, define.Requirements{ + Group: RPM, + Stack: &define.Stack{}, + Sudo: true, + OS: []define.OS{ + { + Type: define.Linux, + Distro: "rhel", + }, + }, + }) + t.Run("when agent is deployed via rpm, a user should not be able to upgrade the agent using the cli", func(t *testing.T) { + ctx := context.Background() + + fixture, err := define.NewFixtureFromLocalBuild(t, define.Version(), atesting.WithPackageFormat("rpm")) + require.NoError(t, err) + installOpts := atesting.InstallOpts{ + NonInteractive: true, + Privileged: true, + Force: true, + } + + _, err = fixture.InstallWithoutEnroll(ctx, &installOpts) + require.NoError(t, err) + + require.Eventuallyf(t, func() bool { + err = fixture.IsHealthy(ctx) + return err == nil + }, 5*time.Minute, time.Second, + "Elastic-Agent did not report healthy. Agent status error: \"%v\"", + err, + ) + + out, err := fixture.Exec(ctx, []string{"upgrade", "1.0.0"}) + require.Error(t, err) + require.Contains(t, string(out), cmd.UpgradeDisabledError.Error()) + }) +} From 1aa5a76ed514dc7346bbb53759ca0a1cd3af3d7a Mon Sep 17 00:00:00 2001 From: kaanyalti Date: Tue, 3 Dec 2024 22:23:14 -0500 Subject: [PATCH 02/20] enhancement(5832): updated fixture install, updated assertions --- pkg/testing/fixture_install.go | 39 ++++++++++++++++--- .../integration/restrict_upgrade_deb_test.go | 4 +- .../integration/restrict_upgrade_rpm_test.go | 4 +- 3 files changed, 37 insertions(+), 10 deletions(-) diff --git a/pkg/testing/fixture_install.go b/pkg/testing/fixture_install.go index cf33122a93b..77b7d98a283 100644 --- a/pkg/testing/fixture_install.go +++ b/pkg/testing/fixture_install.go @@ -175,6 +175,14 @@ func (i *InstallOpts) ToCmdArgs() []string { // - the combined output of Install command stdout and stderr // - an error if any. func (f *Fixture) Install(ctx context.Context, installOpts *InstallOpts, opts ...process.CmdOption) ([]byte, error) { + return f.installFunc(ctx, installOpts, true, opts...) +} + +func (f *Fixture) InstallWithoutEnroll(ctx context.Context, installOpts *InstallOpts, opts ...process.CmdOption) ([]byte, error) { + return f.installFunc(ctx, installOpts, false, opts...) +} + +func (f *Fixture) installFunc(ctx context.Context, installOpts *InstallOpts, shouldEnroll bool, opts ...process.CmdOption) ([]byte, error) { f.t.Logf("[test %s] Inside fixture install function", f.t.Name()) // check for running agents before installing, but only if not installed into a namespace whose point is allowing two agents at once. @@ -184,11 +192,11 @@ func (f *Fixture) Install(ctx context.Context, installOpts *InstallOpts, opts .. switch f.packageFormat { case "targz", "zip": - return f.installNoPkgManager(ctx, installOpts, opts) + return f.installNoPkgManager(ctx, installOpts, shouldEnroll, opts) case "deb": - return f.installDeb(ctx, installOpts, opts) + return f.installDeb(ctx, installOpts, shouldEnroll, opts) case "rpm": - return f.installRpm(ctx, installOpts, opts) + return f.installRpm(ctx, installOpts, shouldEnroll, opts) default: return nil, fmt.Errorf("package format %s isn't supported yet", f.packageFormat) } @@ -202,14 +210,25 @@ func (f *Fixture) Install(ctx context.Context, installOpts *InstallOpts, opts .. // It returns: // - the combined output of Install command stdout and stderr // - an error if any. -func (f *Fixture) installNoPkgManager(ctx context.Context, installOpts *InstallOpts, opts []process.CmdOption) ([]byte, error) { +func (f *Fixture) installNoPkgManager(ctx context.Context, installOpts *InstallOpts, shouldEnroll bool, opts []process.CmdOption) ([]byte, error) { f.t.Logf("[test %s] Inside fixture installNoPkgManager function", f.t.Name()) if installOpts == nil { // default options when not provided installOpts = &InstallOpts{} } + // Removes install params to prevent enrollment + removeEnrollParams := func(installOpts *InstallOpts) { + installOpts.URL = "" + installOpts.EnrollmentToken = "" + installOpts.ESHost = "" + } + installArgs := []string{"install"} + if !shouldEnroll { + removeEnrollParams(installOpts) + } + installArgs = append(installArgs, installOpts.ToCmdArgs()...) out, err := f.Exec(ctx, installArgs, opts...) if err != nil { @@ -410,7 +429,7 @@ func getProcesses(t *gotesting.T, regex string) []runningProcess { // It returns: // - the combined output of Install command stdout and stderr // - an error if any. -func (f *Fixture) installDeb(ctx context.Context, installOpts *InstallOpts, opts []process.CmdOption) ([]byte, error) { +func (f *Fixture) installDeb(ctx context.Context, installOpts *InstallOpts, shouldEnroll bool, opts []process.CmdOption) ([]byte, error) { f.t.Logf("[test %s] Inside fixture installDeb function", f.t.Name()) // Prepare so that the f.srcPackage string is populated err := f.EnsurePrepared(ctx) @@ -456,6 +475,10 @@ func (f *Fixture) installDeb(ctx context.Context, installOpts *InstallOpts, opts return out, fmt.Errorf("systemctl start elastic-agent failed: %w", err) } + if !shouldEnroll { + return nil, nil + } + // apt install doesn't enroll, so need to do that enrollArgs := []string{"elastic-agent", "enroll"} if installOpts.Force { @@ -491,7 +514,7 @@ func (f *Fixture) installDeb(ctx context.Context, installOpts *InstallOpts, opts // It returns: // - the combined output of Install command stdout and stderr // - an error if any. -func (f *Fixture) installRpm(ctx context.Context, installOpts *InstallOpts, opts []process.CmdOption) ([]byte, error) { +func (f *Fixture) installRpm(ctx context.Context, installOpts *InstallOpts, shouldEnroll bool, opts []process.CmdOption) ([]byte, error) { f.t.Logf("[test %s] Inside fixture installRpm function", f.t.Name()) // Prepare so that the f.srcPackage string is populated err := f.EnsurePrepared(ctx) @@ -530,6 +553,10 @@ func (f *Fixture) installRpm(ctx context.Context, installOpts *InstallOpts, opts return out, fmt.Errorf("systemctl start elastic-agent failed: %w", err) } + if !shouldEnroll { + return nil, nil + } + // rpm install doesn't enroll, so need to do that enrollArgs := []string{"elastic-agent", "enroll"} if installOpts.Force { diff --git a/testing/integration/restrict_upgrade_deb_test.go b/testing/integration/restrict_upgrade_deb_test.go index d4589227b0b..8290a4d1563 100644 --- a/testing/integration/restrict_upgrade_deb_test.go +++ b/testing/integration/restrict_upgrade_deb_test.go @@ -7,7 +7,7 @@ import ( "testing" "time" - "github.com/elastic/elastic-agent/internal/pkg/agent/cmd" + "github.com/elastic/elastic-agent/internal/pkg/agent/application/coordinator" atesting "github.com/elastic/elastic-agent/pkg/testing" "github.com/elastic/elastic-agent/pkg/testing/define" "github.com/stretchr/testify/require" @@ -49,6 +49,6 @@ func TestRestrictUpgradeDeb(t *testing.T) { out, err := fixture.Exec(ctx, []string{"upgrade", "1.0.0"}) require.Error(t, err) - require.Contains(t, string(out), cmd.UpgradeDisabledError.Error()) + require.Contains(t, string(out), coordinator.ErrNotUpgradable.Error()) }) } diff --git a/testing/integration/restrict_upgrade_rpm_test.go b/testing/integration/restrict_upgrade_rpm_test.go index f99d52f9e07..293acb08efe 100644 --- a/testing/integration/restrict_upgrade_rpm_test.go +++ b/testing/integration/restrict_upgrade_rpm_test.go @@ -7,7 +7,7 @@ import ( "testing" "time" - "github.com/elastic/elastic-agent/internal/pkg/agent/cmd" + "github.com/elastic/elastic-agent/internal/pkg/agent/application/coordinator" atesting "github.com/elastic/elastic-agent/pkg/testing" "github.com/elastic/elastic-agent/pkg/testing/define" "github.com/stretchr/testify/require" @@ -49,6 +49,6 @@ func TestRestrictUpgradeRPM(t *testing.T) { out, err := fixture.Exec(ctx, []string{"upgrade", "1.0.0"}) require.Error(t, err) - require.Contains(t, string(out), cmd.UpgradeDisabledError.Error()) + require.Contains(t, string(out), coordinator.ErrNotUpgradable.Error()) }) } From a0ad3b7ec3aba63c2380e01ad523a4c54d76cdbd Mon Sep 17 00:00:00 2001 From: kaanyalti Date: Wed, 4 Dec 2024 08:38:25 -0500 Subject: [PATCH 03/20] enhancement(5832): added kubernetes test --- .../kubernetes_agent_standalone_test.go | 210 ++++++++++++++++++ 1 file changed, 210 insertions(+) diff --git a/testing/integration/kubernetes_agent_standalone_test.go b/testing/integration/kubernetes_agent_standalone_test.go index f3158b29644..149265d334a 100644 --- a/testing/integration/kubernetes_agent_standalone_test.go +++ b/testing/integration/kubernetes_agent_standalone_test.go @@ -48,6 +48,7 @@ import ( "helm.sh/helm/v3/pkg/cli" helmKube "helm.sh/helm/v3/pkg/kube" + "github.com/elastic/elastic-agent/internal/pkg/agent/application/coordinator" aclient "github.com/elastic/elastic-agent/pkg/control/v2/client" atesting "github.com/elastic/elastic-agent/pkg/testing" "github.com/elastic/elastic-agent/pkg/testing/define" @@ -479,6 +480,215 @@ func TestKubernetesAgentHelm(t *testing.T) { } } +func TestRestrictCliUpgrade(t *testing.T) { + info := define.Require(t, define.Requirements{ + Stack: &define.Stack{}, + Local: false, + Sudo: false, + OS: []define.OS{ + {Type: define.Kubernetes, DockerVariant: "basic"}, + {Type: define.Kubernetes, DockerVariant: "wolfi"}, + {Type: define.Kubernetes, DockerVariant: "ubi"}, + {Type: define.Kubernetes, DockerVariant: "complete"}, + {Type: define.Kubernetes, DockerVariant: "complete-wolfi"}, + }, + Group: define.Kubernetes, + }) + + agentImage := os.Getenv("AGENT_IMAGE") + require.NotEmpty(t, agentImage, "AGENT_IMAGE must be set") + + agentImageParts := strings.SplitN(agentImage, ":", 2) + require.Len(t, agentImageParts, 2, "AGENT_IMAGE must be in the form ':'") + agentImageRepo := agentImageParts[0] + agentImageTag := agentImageParts[1] + + client, err := info.KubeClient() + require.NoError(t, err) + require.NotNil(t, client) + + testLogsBasePath := os.Getenv("K8S_TESTS_POD_LOGS_BASE") + require.NotEmpty(t, testLogsBasePath, "K8S_TESTS_POD_LOGS_BASE must be set") + + err = os.MkdirAll(filepath.Join(testLogsBasePath, t.Name()), 0755) + require.NoError(t, err, "failed to create test logs directory") + + namespace := info.Namespace + + esHost := os.Getenv("ELASTICSEARCH_HOST") + require.NotEmpty(t, esHost, "ELASTICSEARCH_HOST must be set") + + esAPIKey, err := generateESAPIKey(info.ESClient, namespace) + require.NoError(t, err, "failed to generate ES API key") + require.NotEmpty(t, esAPIKey, "failed to generate ES API key") + + require.NoError(t, err, "failed to create fleet enroll params") + + testCases := []struct { + name string + values map[string]any + atLeastValidatedPodsNumber int + runK8SInnerTests bool + }{ + { + name: "helm standalone agent default kubernetes privileged", + values: map[string]any{ + "kubernetes": map[string]any{ + "enabled": true, + }, + "agent": map[string]any{ + "unprivileged": false, + "image": map[string]any{ + "repository": agentImageRepo, + "tag": agentImageTag, + "pullPolicy": "Never", + }, + }, + "outputs": map[string]any{ + "default": map[string]any{ + "type": "ESPlainAuthAPI", + "url": esHost, + "api_key": esAPIKey, + }, + }, + }, + runK8SInnerTests: true, + // - perNode Daemonset (at least 1 agent pod) + // - clusterWide Deployment (1 agent pod) + // - ksmSharded Statefulset (1 agent pod) + atLeastValidatedPodsNumber: 3, + }, + { + name: "helm standalone agent default kubernetes unprivileged", + values: map[string]any{ + "kubernetes": map[string]any{ + "enabled": true, + }, + "agent": map[string]any{ + "unprivileged": true, + "image": map[string]any{ + "repository": agentImageRepo, + "tag": agentImageTag, + "pullPolicy": "Never", + }, + }, + "outputs": map[string]any{ + "default": map[string]any{ + "type": "ESPlainAuthAPI", + "url": esHost, + "api_key": esAPIKey, + }, + }, + }, + runK8SInnerTests: true, + // - perNode Daemonset (at least 1 agent pod) + // - clusterWide Deployment (1 agent pod) + // - ksmSharded Statefulset (1 agent pod) + atLeastValidatedPodsNumber: 3, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + ctx := context.Background() + hasher := sha256.New() + hasher.Write([]byte(tc.name)) + testNamespace := strings.ToLower(base64.URLEncoding.EncodeToString(hasher.Sum(nil))) + testNamespace = noSpecialCharsRegexp.ReplaceAllString(testNamespace, "") + + settings := cli.New() + settings.SetNamespace(testNamespace) + actionConfig := &action.Configuration{} + + helmChart, err := loader.Load(agentK8SHelm) + require.NoError(t, err, "failed to load helm chart") + + err = actionConfig.Init(settings.RESTClientGetter(), settings.Namespace(), "", + func(format string, v ...interface{}) {}) + require.NoError(t, err, "failed to init helm action config") + + helmValues := tc.values + + t.Cleanup(func() { + if t.Failed() { + k8sDumpAllPodLogs(ctx, client, testNamespace, testNamespace, testLogsBasePath) + } + + uninstallAction := action.NewUninstall(actionConfig) + uninstallAction.Wait = true + + _, err = uninstallAction.Run("helm-agent") + if err != nil { + require.NoError(t, err, "failed to uninstall helm chart") + } + }) + + installAction := action.NewInstall(actionConfig) + installAction.Namespace = testNamespace + installAction.CreateNamespace = true + installAction.UseReleaseName = true + installAction.ReleaseName = "helm-agent" + installAction.Timeout = 2 * time.Minute + installAction.Wait = true + installAction.WaitForJobs = true + _, err = installAction.Run(helmChart, helmValues) + require.NoError(t, err, "failed to install helm chart") + + podList := &corev1.PodList{} + err = client.Resources(testNamespace).List(ctx, podList) + require.NoError(t, err, fmt.Sprintf("failed to list pods in namespace %s", testNamespace)) + + checkedAgentContainers := 0 + + for _, pod := range podList.Items { + if !strings.HasPrefix(pod.GetName(), "agent-") { + continue + } + + command := []string{"elastic-agent", "status"} + var stdout, stderr bytes.Buffer + var agentHealthyErr error + // we will wait maximum 120 seconds for the agent to report healthy + for i := 0; i < 120; i++ { + stdout.Reset() + stderr.Reset() + agentHealthyErr = client.Resources().ExecInPod(ctx, testNamespace, pod.Name, "agent", command, &stdout, &stderr) + if agentHealthyErr == nil { + break + } + time.Sleep(time.Second * 1) + } + + statusString := stdout.String() + if agentHealthyErr != nil { + t.Errorf("elastic-agent never reported healthy: %v", agentHealthyErr) + t.Logf("stdout: %s\n", statusString) + t.Logf("stderr: %s\n", stderr.String()) + t.FailNow() + return + } + + stdout.Reset() + stderr.Reset() + + upgradeCmd := []string{"elastic-agent", "upgrade", "1.0.0"} + upgradeCmdError := client.Resources().ExecInPod(ctx, testNamespace, pod.Name, "agent", upgradeCmd, &stdout, &stderr) + + errOut := stderr.String() + require.Error(t, upgradeCmdError) + require.Contains(t, errOut, coordinator.ErrNotUpgradable.Error()) + + stderr.Reset() + + checkedAgentContainers++ + } + + require.GreaterOrEqual(t, checkedAgentContainers, tc.atLeastValidatedPodsNumber, + fmt.Sprintf("at least %d agent containers should be checked", tc.atLeastValidatedPodsNumber)) + }) + } +} + // k8sCheckAgentStatus checks that the agent reports healthy. func k8sCheckAgentStatus(ctx context.Context, client klient.Client, stdout *bytes.Buffer, stderr *bytes.Buffer, namespace string, agentPodName string, containerName string, componentPresence map[string]bool, From 65db4b66e45a7801212d32dc2241d7c939cd6aad Mon Sep 17 00:00:00 2001 From: kaanyalti Date: Wed, 4 Dec 2024 08:56:35 -0500 Subject: [PATCH 04/20] enhancement(5832): ran mage update --- testing/integration/restrict_upgrade_deb_test.go | 7 ++++++- testing/integration/restrict_upgrade_rpm_test.go | 7 ++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/testing/integration/restrict_upgrade_deb_test.go b/testing/integration/restrict_upgrade_deb_test.go index 8290a4d1563..80ee581eef3 100644 --- a/testing/integration/restrict_upgrade_deb_test.go +++ b/testing/integration/restrict_upgrade_deb_test.go @@ -1,3 +1,7 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License 2.0; +// you may not use this file except in compliance with the Elastic License 2.0. + //go:build integration package integration @@ -7,10 +11,11 @@ import ( "testing" "time" + "github.com/stretchr/testify/require" + "github.com/elastic/elastic-agent/internal/pkg/agent/application/coordinator" atesting "github.com/elastic/elastic-agent/pkg/testing" "github.com/elastic/elastic-agent/pkg/testing/define" - "github.com/stretchr/testify/require" ) func TestRestrictUpgradeDeb(t *testing.T) { diff --git a/testing/integration/restrict_upgrade_rpm_test.go b/testing/integration/restrict_upgrade_rpm_test.go index 293acb08efe..5b1d8e31607 100644 --- a/testing/integration/restrict_upgrade_rpm_test.go +++ b/testing/integration/restrict_upgrade_rpm_test.go @@ -1,3 +1,7 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License 2.0; +// you may not use this file except in compliance with the Elastic License 2.0. + //go:build integration package integration @@ -7,10 +11,11 @@ import ( "testing" "time" + "github.com/stretchr/testify/require" + "github.com/elastic/elastic-agent/internal/pkg/agent/application/coordinator" atesting "github.com/elastic/elastic-agent/pkg/testing" "github.com/elastic/elastic-agent/pkg/testing/define" - "github.com/stretchr/testify/require" ) func TestRestrictUpgradeRPM(t *testing.T) { From 2a096fe00af47eced20e2d1e265cf06681020d34 Mon Sep 17 00:00:00 2001 From: kaanyalti Date: Thu, 5 Dec 2024 14:05:39 -0500 Subject: [PATCH 05/20] enhancement(5832): execute rpm test in default group --- testing/integration/restrict_upgrade_rpm_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testing/integration/restrict_upgrade_rpm_test.go b/testing/integration/restrict_upgrade_rpm_test.go index 5b1d8e31607..9de1a7d709f 100644 --- a/testing/integration/restrict_upgrade_rpm_test.go +++ b/testing/integration/restrict_upgrade_rpm_test.go @@ -20,7 +20,7 @@ import ( func TestRestrictUpgradeRPM(t *testing.T) { define.Require(t, define.Requirements{ - Group: RPM, + Group: Default, Stack: &define.Stack{}, Sudo: true, OS: []define.OS{ From 519a353ed552e800a9234fecbc6443891b06659c Mon Sep 17 00:00:00 2001 From: kaanyalti Date: Thu, 5 Dec 2024 14:54:27 -0500 Subject: [PATCH 06/20] Revert "enhancement(5832): execute rpm test in default group" This reverts commit fa93a8ee22f3ab11ba5aab0c92c7a717ab5e04a8. --- testing/integration/restrict_upgrade_rpm_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testing/integration/restrict_upgrade_rpm_test.go b/testing/integration/restrict_upgrade_rpm_test.go index 9de1a7d709f..5b1d8e31607 100644 --- a/testing/integration/restrict_upgrade_rpm_test.go +++ b/testing/integration/restrict_upgrade_rpm_test.go @@ -20,7 +20,7 @@ import ( func TestRestrictUpgradeRPM(t *testing.T) { define.Require(t, define.Requirements{ - Group: Default, + Group: RPM, Stack: &define.Stack{}, Sudo: true, OS: []define.OS{ From e5c26fc456b341f30cc495f153899e8bfdbd4b5e Mon Sep 17 00:00:00 2001 From: kaanyalti Date: Fri, 13 Dec 2024 23:45:02 -0500 Subject: [PATCH 07/20] enhancement(5832): debugging ci issues --- .buildkite/scripts/steps/integration_tests.sh | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/.buildkite/scripts/steps/integration_tests.sh b/.buildkite/scripts/steps/integration_tests.sh index 834da1cd4c6..d58503f07cb 100755 --- a/.buildkite/scripts/steps/integration_tests.sh +++ b/.buildkite/scripts/steps/integration_tests.sh @@ -7,19 +7,18 @@ STACK_PROVISIONER="${1:-"stateful"}" MAGE_TARGET="${2:-"integration:test"}" MAGE_SUBTARGET="${3:-""}" - # Override the stack version from `.package-version` contents # There is a time when the current snapshot is not available on cloud yet, so we cannot use the latest version automatically # This file is managed by an automation (mage integration:UpdateAgentPackageVersion) that check if the snapshot is ready. STACK_VERSION="$(cat .package-version)" if [[ -n "$STACK_VERSION" ]]; then - STACK_VERSION=${STACK_VERSION}"-SNAPSHOT" + STACK_VERSION=${STACK_VERSION}"-SNAPSHOT" fi # Run integration tests set +e -AGENT_STACK_VERSION="${STACK_VERSION}" TEST_INTEG_CLEAN_ON_EXIT=true STACK_PROVISIONER="$STACK_PROVISIONER" SNAPSHOT=true mage $MAGE_TARGET $MAGE_SUBTARGET +AGENT_STACK_VERSION="${STACK_VERSION}" TEST_INTEG_CLEAN_ON_EXIT=true TEST_PACKAGES="rpm" TEST_GROUPS="rpm" STACK_PROVISIONER="$STACK_PROVISIONER" SNAPSHOT=true mage $MAGE_TARGET $MAGE_SUBTARGET TESTS_EXIT_STATUS=$? set -e @@ -27,10 +26,10 @@ set -e outputXML="build/TEST-go-integration.xml" if [ -f "$outputXML" ]; then - go install github.com/alexec/junit2html@latest - junit2html < "$outputXML" > build/TEST-report.html + go install github.com/alexec/junit2html@latest + junit2html <"$outputXML" >build/TEST-report.html else - echo "Cannot generate HTML test report: $outputXML not found" + echo "Cannot generate HTML test report: $outputXML not found" fi exit $TESTS_EXIT_STATUS From a10335b6c9fad4ed95cf5bd548f5669a2198729d Mon Sep 17 00:00:00 2001 From: kaanyalti Date: Mon, 16 Dec 2024 14:33:33 -0500 Subject: [PATCH 08/20] enahancement(5832): added logs to see if any other agent is running, commented out other integration tests --- .buildkite/bk.integration.pipeline.yml | 230 ++++++++++++------------- .buildkite/integration.pipeline.yml | 120 ++++++------- pkg/testing/fixture_install.go | 2 + 3 files changed, 177 insertions(+), 175 deletions(-) diff --git a/.buildkite/bk.integration.pipeline.yml b/.buildkite/bk.integration.pipeline.yml index 6d61b8743fb..557ad80b6f0 100644 --- a/.buildkite/bk.integration.pipeline.yml +++ b/.buildkite/bk.integration.pipeline.yml @@ -21,122 +21,122 @@ steps: image: "docker.elastic.co/ci-agent-images/platform-ingest/buildkite-agent-beats-ci-with-hooks:0.5" useCustomGlobalHooks: true - - group: "Stateful: Windows" - key: integration-tests-win - depends_on: - - integration-ess - steps: - - label: "Win2022:sudo:{{matrix}}" - command: | - buildkite-agent artifact download build/distributions/** . --step 'package-it' --build ${BUILDKITE_TRIGGERED_FROM_BUILD_ID} - .buildkite/scripts/integration-tests.ps1 {{matrix}} true - artifact_paths: - - build/** - - build/diagnostics/** - agents: - provider: "gcp" - machineType: "n1-standard-8" - image: "family/platform-ingest-elastic-agent-windows-2022" - matrix: - - default - - fleet - - fleet-privileged - - upgrade - - - label: "Win2022:non-sudo:{{matrix}}" - command: | - buildkite-agent artifact download build/distributions/** . --step 'package-it' --build ${BUILDKITE_TRIGGERED_FROM_BUILD_ID} - .buildkite/scripts/integration-tests.ps1 {{matrix}} false - artifact_paths: - - build/** - - build/diagnostics/** - agents: - provider: "gcp" - machineType: "n1-standard-8" - image: "family/platform-ingest-elastic-agent-windows-2022" - matrix: - - default - - - group: "Stateful:Ubuntu" - key: integration-tests-ubuntu - depends_on: - - integration-ess - steps: - - label: "x86_64:non-sudo: {{matrix}}" - command: | - buildkite-agent artifact download build/distributions/** . --step 'package-it' --build ${BUILDKITE_TRIGGERED_FROM_BUILD_ID} - .buildkite/scripts/steps/integration_tests_tf.sh {{matrix}} false - artifact_paths: - - build/** - - build/diagnostics/** - agents: - provider: "gcp" - machineType: "n1-standard-8" - image: "family/platform-ingest-elastic-agent-ubuntu-2404" - matrix: - - default - - - label: "x86_64:sudo: {{matrix}}" - command: | - buildkite-agent artifact download build/distributions/** . --step 'package-it' --build ${BUILDKITE_TRIGGERED_FROM_BUILD_ID} - .buildkite/scripts/steps/integration_tests_tf.sh {{matrix}} true - artifact_paths: - - build/** - - build/diagnostics/** - agents: - provider: "gcp" - machineType: "n1-standard-8" - image: "family/platform-ingest-elastic-agent-ubuntu-2404" - matrix: - - default - - container - - fleet-upgrade-to-pr-build - - upgrade - - fleet - - fqdn - - deb - - fleet-airgapped - - fleet-privileged - - fleet-airgapped-privileged + # - group: "Stateful: Windows" + # key: integration-tests-win + # depends_on: + # - integration-ess + # steps: + # - label: "Win2022:sudo:{{matrix}}" + # command: | + # buildkite-agent artifact download build/distributions/** . --step 'package-it' --build ${BUILDKITE_TRIGGERED_FROM_BUILD_ID} + # .buildkite/scripts/integration-tests.ps1 {{matrix}} true + # artifact_paths: + # - build/** + # - build/diagnostics/** + # agents: + # provider: "gcp" + # machineType: "n1-standard-8" + # image: "family/platform-ingest-elastic-agent-windows-2022" + # matrix: + # - default + # - fleet + # - fleet-privileged + # - upgrade + # + # - label: "Win2022:non-sudo:{{matrix}}" + # command: | + # buildkite-agent artifact download build/distributions/** . --step 'package-it' --build ${BUILDKITE_TRIGGERED_FROM_BUILD_ID} + # .buildkite/scripts/integration-tests.ps1 {{matrix}} false + # artifact_paths: + # - build/** + # - build/diagnostics/** + # agents: + # provider: "gcp" + # machineType: "n1-standard-8" + # image: "family/platform-ingest-elastic-agent-windows-2022" + # matrix: + # - default - - label: "arm:sudo: {{matrix}}" - skip: true - command: | - buildkite-agent artifact download build/distributions/** . --step 'package-it' --build ${BUILDKITE_TRIGGERED_FROM_BUILD_ID} - .buildkite/scripts/steps/integration_tests_tf.sh {{matrix}} true - artifact_paths: - - build/** - - build/diagnostics/** - agents: - provider: "aws" - imagePrefix: "platform-ingest-beats-ubuntu-2404-aarch64" - instanceType: "m6g.2xlarge" - matrix: - - default - - container - - fleet-upgrade-to-pr-build - - upgrade - - fleet - - fqdn - - deb - - fleet-airgapped - - fleet-privileged - - fleet-airgapped-privileged - - - label: "arm:non-sudo: {{matrix}}" - skip: true - command: | - buildkite-agent artifact download build/distributions/** . --step 'package-it' --build ${BUILDKITE_TRIGGERED_FROM_BUILD_ID} - .buildkite/scripts/steps/integration_tests_tf.sh {{matrix}} false - artifact_paths: - - build/** - - build/diagnostics/** - agents: - provider: "aws" - imagePrefix: "platform-ingest-beats-ubuntu-2404-aarch64" - instanceType: "m6g.xlarge" - matrix: - - default + # - group: "Stateful:Ubuntu" + # key: integration-tests-ubuntu + # depends_on: + # - integration-ess + # steps: + # - label: "x86_64:non-sudo: {{matrix}}" + # command: | + # buildkite-agent artifact download build/distributions/** . --step 'package-it' --build ${BUILDKITE_TRIGGERED_FROM_BUILD_ID} + # .buildkite/scripts/steps/integration_tests_tf.sh {{matrix}} false + # artifact_paths: + # - build/** + # - build/diagnostics/** + # agents: + # provider: "gcp" + # machineType: "n1-standard-8" + # image: "family/platform-ingest-elastic-agent-ubuntu-2404" + # matrix: + # - default + # + # - label: "x86_64:sudo: {{matrix}}" + # command: | + # buildkite-agent artifact download build/distributions/** . --step 'package-it' --build ${BUILDKITE_TRIGGERED_FROM_BUILD_ID} + # .buildkite/scripts/steps/integration_tests_tf.sh {{matrix}} true + # artifact_paths: + # - build/** + # - build/diagnostics/** + # agents: + # provider: "gcp" + # machineType: "n1-standard-8" + # image: "family/platform-ingest-elastic-agent-ubuntu-2404" + # matrix: + # - default + # - container + # - fleet-upgrade-to-pr-build + # - upgrade + # - fleet + # - fqdn + # - deb + # - fleet-airgapped + # - fleet-privileged + # - fleet-airgapped-privileged + # + # - label: "arm:sudo: {{matrix}}" + # skip: true + # command: | + # buildkite-agent artifact download build/distributions/** . --step 'package-it' --build ${BUILDKITE_TRIGGERED_FROM_BUILD_ID} + # .buildkite/scripts/steps/integration_tests_tf.sh {{matrix}} true + # artifact_paths: + # - build/** + # - build/diagnostics/** + # agents: + # provider: "aws" + # imagePrefix: "platform-ingest-beats-ubuntu-2404-aarch64" + # instanceType: "m6g.2xlarge" + # matrix: + # - default + # - container + # - fleet-upgrade-to-pr-build + # - upgrade + # - fleet + # - fqdn + # - deb + # - fleet-airgapped + # - fleet-privileged + # - fleet-airgapped-privileged + # + # - label: "arm:non-sudo: {{matrix}}" + # skip: true + # command: | + # buildkite-agent artifact download build/distributions/** . --step 'package-it' --build ${BUILDKITE_TRIGGERED_FROM_BUILD_ID} + # .buildkite/scripts/steps/integration_tests_tf.sh {{matrix}} false + # artifact_paths: + # - build/** + # - build/diagnostics/** + # agents: + # provider: "aws" + # imagePrefix: "platform-ingest-beats-ubuntu-2404-aarch64" + # instanceType: "m6g.xlarge" + # matrix: + # - default - group: "Stateful(Sudo):RHEL8" key: integration-tests-rhel8 diff --git a/.buildkite/integration.pipeline.yml b/.buildkite/integration.pipeline.yml index 3095024feff..14c6f714bfc 100644 --- a/.buildkite/integration.pipeline.yml +++ b/.buildkite/integration.pipeline.yml @@ -14,48 +14,48 @@ steps: provider: "gcp" machineType: "n1-standard-8" - - label: "Serverless integration test" - key: "serverless-integration-tests" - depends_on: - - package-it - concurrency_group: elastic-agent-extended-testing/serverless-integration - concurrency: 8 - env: - # we run each step in a different data center to spread the load - TEST_INTEG_AUTH_GCP_DATACENTER: "us-central1-a" - command: | - buildkite-agent artifact download build/distributions/** . --step 'package-it' - .buildkite/scripts/steps/integration_tests.sh serverless integration:single TestLogIngestionFleetManaged #right now, run a single test in serverless mode as a sort of smoke test, instead of re-running the entire suite - artifact_paths: - - "build/TEST-**" - - "build/diagnostics/*" - agents: - provider: "gcp" - machineType: "n1-standard-8" - notify: - - github_commit_status: - context: "buildkite/elastic-agent-extended-testing - Serverless integration test" + # - label: "Serverless integration test" + # key: "serverless-integration-tests" + # depends_on: + # - package-it + # concurrency_group: elastic-agent-extended-testing/serverless-integration + # concurrency: 8 + # env: + # # we run each step in a different data center to spread the load + # TEST_INTEG_AUTH_GCP_DATACENTER: "us-central1-a" + # command: | + # buildkite-agent artifact download build/distributions/** . --step 'package-it' + # .buildkite/scripts/steps/integration_tests.sh serverless integration:single TestLogIngestionFleetManaged #right now, run a single test in serverless mode as a sort of smoke test, instead of re-running the entire suite + # artifact_paths: + # - "build/TEST-**" + # - "build/diagnostics/*" + # agents: + # provider: "gcp" + # machineType: "n1-standard-8" + # notify: + # - github_commit_status: + # context: "buildkite/elastic-agent-extended-testing - Serverless integration test" - - label: "Extended runtime leak tests" - key: "extended-integration-tests" - depends_on: - - package-it - concurrency_group: elastic-agent-extended-testing/leak-tests - concurrency: 8 - env: - TEST_INTEG_AUTH_GCP_DATACENTER: "us-central1-b" - command: | - buildkite-agent artifact download build/distributions/** . --step 'package-it' - .buildkite/scripts/steps/integration_tests.sh stateful integration:TestForResourceLeaks - artifact_paths: - - "build/TEST-**" - - "build/diagnostics/*" - agents: - provider: "gcp" - machineType: "n1-standard-8" - notify: - - github_commit_status: - context: "buildkite/elastic-agent-extended-testing - Extended runtime leak tests" + # - label: "Extended runtime leak tests" + # key: "extended-integration-tests" + # depends_on: + # - package-it + # concurrency_group: elastic-agent-extended-testing/leak-tests + # concurrency: 8 + # env: + # TEST_INTEG_AUTH_GCP_DATACENTER: "us-central1-b" + # command: | + # buildkite-agent artifact download build/distributions/** . --step 'package-it' + # .buildkite/scripts/steps/integration_tests.sh stateful integration:TestForResourceLeaks + # artifact_paths: + # - "build/TEST-**" + # - "build/diagnostics/*" + # agents: + # provider: "gcp" + # machineType: "n1-standard-8" + # notify: + # - github_commit_status: + # context: "buildkite/elastic-agent-extended-testing - Extended runtime leak tests" - label: "Triggering Integration tests" depends_on: @@ -83,22 +83,22 @@ steps: - github_commit_status: context: "buildkite/elastic-agent-extended-testing - Serverless Beats Tests" - - label: "Kubernetes Integration tests" - key: "k8s-integration-tests" - env: - K8S_VERSION: "v1.31.0" - KIND_VERSION: "v0.24.0" - command: ".buildkite/scripts/steps/k8s-extended-tests.sh" - artifact_paths: - - "build/k8s-logs*/*" - - "build/k8s-logs*/**/*" - - "build/TEST-**" - - "build/diagnostics/*" - agents: - provider: "gcp" - machineType: "c2-standard-16" - image: "family/core-ubuntu-2204" - diskSizeGb: 400 - notify: - - github_commit_status: - context: "buildkite/elastic-agent-extended-testing - Kubernetes Integration tests" + # - label: "Kubernetes Integration tests" + # key: "k8s-integration-tests" + # env: + # K8S_VERSION: "v1.31.0" + # KIND_VERSION: "v0.24.0" + # command: ".buildkite/scripts/steps/k8s-extended-tests.sh" + # artifact_paths: + # - "build/k8s-logs*/*" + # - "build/k8s-logs*/**/*" + # - "build/TEST-**" + # - "build/diagnostics/*" + # agents: + # provider: "gcp" + # machineType: "c2-standard-16" + # image: "family/core-ubuntu-2204" + # diskSizeGb: 400 + # notify: + # - github_commit_status: + # context: "buildkite/elastic-agent-extended-testing - Kubernetes Integration tests" diff --git a/pkg/testing/fixture_install.go b/pkg/testing/fixture_install.go index 77b7d98a283..7ed53716932 100644 --- a/pkg/testing/fixture_install.go +++ b/pkg/testing/fixture_install.go @@ -187,6 +187,7 @@ func (f *Fixture) installFunc(ctx context.Context, installOpts *InstallOpts, sho // check for running agents before installing, but only if not installed into a namespace whose point is allowing two agents at once. if installOpts != nil && !installOpts.Develop && installOpts.Namespace == "" { + fmt.Println("TESTING IF ANY OTHER AGENTS ARE RUNNING ", getElasticAgentProcesses(f.t)) assert.Empty(f.t, getElasticAgentProcesses(f.t), "there should be no running agent at beginning of Install()") } @@ -553,6 +554,7 @@ func (f *Fixture) installRpm(ctx context.Context, installOpts *InstallOpts, shou return out, fmt.Errorf("systemctl start elastic-agent failed: %w", err) } + fmt.Println("SHOULD ENROLL ", shouldEnroll) if !shouldEnroll { return nil, nil } From 865708399dcff82656c46d2c7c250223be8fc766 Mon Sep 17 00:00:00 2001 From: kaanyalti Date: Mon, 16 Dec 2024 20:20:32 -0500 Subject: [PATCH 09/20] enhancement(5832): added cleanup steps to rpm tests --- pkg/testing/fixture_install.go | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/pkg/testing/fixture_install.go b/pkg/testing/fixture_install.go index 7ed53716932..22280d654ac 100644 --- a/pkg/testing/fixture_install.go +++ b/pkg/testing/fixture_install.go @@ -529,8 +529,32 @@ func (f *Fixture) installRpm(ctx context.Context, installOpts *InstallOpts, shou return out, fmt.Errorf("rpm install failed: %w output:%s", err, string(out)) } + f.t.Cleanup(func() { + if f.t.Failed() { + f.DumpProcesses("-cleanup") + } + }) + + f.t.Cleanup(func() { + assert.Empty(f.t, getElasticAgentProcesses(f.t), "there should be no running agents left after running RPM tests") + }) + f.t.Cleanup(func() { f.t.Logf("[test %s] Inside fixture installRpm cleanup function", f.t.Name()) + + // diagnostics is collected when either the environment variable + // AGENT_COLLECT_DIAG=true or the test is marked failed + collect := collectDiagFlag() + failed := f.t.Failed() + if collect || failed { + if collect { + f.t.Logf("collecting diagnostics; AGENT_COLLECT_DIAG=true") + } else if failed { + f.t.Logf("collecting diagnostics; test failed") + } + f.collectDiagnostics() + } + uninstallCtx, uninstallCancel := context.WithTimeout(context.Background(), 5*time.Minute) defer uninstallCancel() // stop elastic-agent, non fatal if error, might have been stopped before this. From 1ef1a551f533e4ba0272ab66ff26241b49ca30fe Mon Sep 17 00:00:00 2001 From: kaanyalti Date: Tue, 17 Dec 2024 11:33:18 -0500 Subject: [PATCH 10/20] enhancement(5832): trying 777 permission --- pkg/testing/fixture.go | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/pkg/testing/fixture.go b/pkg/testing/fixture.go index 89968451747..e9e363be6c8 100644 --- a/pkg/testing/fixture.go +++ b/pkg/testing/fixture.go @@ -341,7 +341,6 @@ func (f *Fixture) RunBeat(ctx context.Context) error { process.WithContext(ctx), process.WithArgs(args), process.WithCmdOptions(attachOutErr(stdOut, stdErr))) - if err != nil { return fmt.Errorf("failed to spawn %s: %w", f.binaryName, err) } @@ -396,7 +395,8 @@ func RunProcess(t *testing.T, lp Logger, ctx context.Context, runLength time.Duration, logOutput, allowErrs bool, - processPath string, args ...string) error { + processPath string, args ...string, +) error { if _, deadlineSet := ctx.Deadline(); !deadlineSet { t.Fatal("Context passed to RunProcess() has no deadline set.") } @@ -414,7 +414,6 @@ func RunProcess(t *testing.T, process.WithContext(ctx), process.WithArgs(args), process.WithCmdOptions(attachOutErr(stdOut, stdErr))) - if err != nil { return fmt.Errorf("failed to spawn %q: %w", processPath, err) } @@ -544,7 +543,6 @@ func (f *Fixture) executeWithClient(ctx context.Context, command string, disable process.WithContext(ctx), process.WithArgs(args), process.WithCmdOptions(attachOutErr(stdOut, stdErr))) - if err != nil { return fmt.Errorf("failed to spawn %s: %w", f.binaryName, err) } @@ -1031,7 +1029,7 @@ func (f *Fixture) DumpProcesses(suffix string) { } f.t.Logf("Dumping running processes in %s", filePath) - file, err := os.OpenFile(filePath, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0o644) + file, err := os.OpenFile(filePath, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0777) if err != nil { f.t.Logf("failed to dump process; failed to create output file %s root: %s", filePath, err) return From 8b0f728c5538f992d31aeb0f36d43a81a818e780 Mon Sep 17 00:00:00 2001 From: kaanyalti Date: Wed, 18 Dec 2024 14:51:22 -0500 Subject: [PATCH 11/20] enhancement(5832): removed diagnostics, added rpm cleanup step that purges all agent files --- pkg/testing/fixture_install.go | 31 +++++++------------------------ 1 file changed, 7 insertions(+), 24 deletions(-) diff --git a/pkg/testing/fixture_install.go b/pkg/testing/fixture_install.go index 22280d654ac..289e6356d7d 100644 --- a/pkg/testing/fixture_install.go +++ b/pkg/testing/fixture_install.go @@ -529,32 +529,9 @@ func (f *Fixture) installRpm(ctx context.Context, installOpts *InstallOpts, shou return out, fmt.Errorf("rpm install failed: %w output:%s", err, string(out)) } - f.t.Cleanup(func() { - if f.t.Failed() { - f.DumpProcesses("-cleanup") - } - }) - - f.t.Cleanup(func() { - assert.Empty(f.t, getElasticAgentProcesses(f.t), "there should be no running agents left after running RPM tests") - }) - f.t.Cleanup(func() { f.t.Logf("[test %s] Inside fixture installRpm cleanup function", f.t.Name()) - // diagnostics is collected when either the environment variable - // AGENT_COLLECT_DIAG=true or the test is marked failed - collect := collectDiagFlag() - failed := f.t.Failed() - if collect || failed { - if collect { - f.t.Logf("collecting diagnostics; AGENT_COLLECT_DIAG=true") - } else if failed { - f.t.Logf("collecting diagnostics; test failed") - } - f.collectDiagnostics() - } - uninstallCtx, uninstallCancel := context.WithTimeout(context.Background(), 5*time.Minute) defer uninstallCancel() // stop elastic-agent, non fatal if error, might have been stopped before this. @@ -570,6 +547,13 @@ func (f *Fixture) installRpm(ctx context.Context, installOpts *InstallOpts, shou f.t.Logf("failed to 'sudo rpm -e elastic-agent': %s, output: %s", err, string(out)) f.t.FailNow() } + + f.t.Logf("removing installed agent files") + out, err = exec.CommandContext(uninstallCtx, "sudo", "rm", "-rf", "/var/lib/elastic-agent", "/var/log/elastic-agent", "/etc/elastic-agent").CombinedOutput() + if err != nil { + f.t.Logf("failed to 'sudo rm -rf /var/lib/elastic-agent /var/log/elastic-agent/ /etc/elastic-agent'") + f.t.FailNow() + } }) // start elastic-agent @@ -578,7 +562,6 @@ func (f *Fixture) installRpm(ctx context.Context, installOpts *InstallOpts, shou return out, fmt.Errorf("systemctl start elastic-agent failed: %w", err) } - fmt.Println("SHOULD ENROLL ", shouldEnroll) if !shouldEnroll { return nil, nil } From 973b01d3f4d099e9d2096cd6b74f7e4cf18be178 Mon Sep 17 00:00:00 2001 From: kaanyalti Date: Wed, 18 Dec 2024 22:21:46 -0500 Subject: [PATCH 12/20] enhancement(5832): running all tests --- .buildkite/bk.integration.pipeline.yml | 230 +++++++++--------- .buildkite/scripts/steps/integration_tests.sh | 2 +- 2 files changed, 116 insertions(+), 116 deletions(-) diff --git a/.buildkite/bk.integration.pipeline.yml b/.buildkite/bk.integration.pipeline.yml index 557ad80b6f0..6d61b8743fb 100644 --- a/.buildkite/bk.integration.pipeline.yml +++ b/.buildkite/bk.integration.pipeline.yml @@ -21,122 +21,122 @@ steps: image: "docker.elastic.co/ci-agent-images/platform-ingest/buildkite-agent-beats-ci-with-hooks:0.5" useCustomGlobalHooks: true - # - group: "Stateful: Windows" - # key: integration-tests-win - # depends_on: - # - integration-ess - # steps: - # - label: "Win2022:sudo:{{matrix}}" - # command: | - # buildkite-agent artifact download build/distributions/** . --step 'package-it' --build ${BUILDKITE_TRIGGERED_FROM_BUILD_ID} - # .buildkite/scripts/integration-tests.ps1 {{matrix}} true - # artifact_paths: - # - build/** - # - build/diagnostics/** - # agents: - # provider: "gcp" - # machineType: "n1-standard-8" - # image: "family/platform-ingest-elastic-agent-windows-2022" - # matrix: - # - default - # - fleet - # - fleet-privileged - # - upgrade - # - # - label: "Win2022:non-sudo:{{matrix}}" - # command: | - # buildkite-agent artifact download build/distributions/** . --step 'package-it' --build ${BUILDKITE_TRIGGERED_FROM_BUILD_ID} - # .buildkite/scripts/integration-tests.ps1 {{matrix}} false - # artifact_paths: - # - build/** - # - build/diagnostics/** - # agents: - # provider: "gcp" - # machineType: "n1-standard-8" - # image: "family/platform-ingest-elastic-agent-windows-2022" - # matrix: - # - default + - group: "Stateful: Windows" + key: integration-tests-win + depends_on: + - integration-ess + steps: + - label: "Win2022:sudo:{{matrix}}" + command: | + buildkite-agent artifact download build/distributions/** . --step 'package-it' --build ${BUILDKITE_TRIGGERED_FROM_BUILD_ID} + .buildkite/scripts/integration-tests.ps1 {{matrix}} true + artifact_paths: + - build/** + - build/diagnostics/** + agents: + provider: "gcp" + machineType: "n1-standard-8" + image: "family/platform-ingest-elastic-agent-windows-2022" + matrix: + - default + - fleet + - fleet-privileged + - upgrade + + - label: "Win2022:non-sudo:{{matrix}}" + command: | + buildkite-agent artifact download build/distributions/** . --step 'package-it' --build ${BUILDKITE_TRIGGERED_FROM_BUILD_ID} + .buildkite/scripts/integration-tests.ps1 {{matrix}} false + artifact_paths: + - build/** + - build/diagnostics/** + agents: + provider: "gcp" + machineType: "n1-standard-8" + image: "family/platform-ingest-elastic-agent-windows-2022" + matrix: + - default + + - group: "Stateful:Ubuntu" + key: integration-tests-ubuntu + depends_on: + - integration-ess + steps: + - label: "x86_64:non-sudo: {{matrix}}" + command: | + buildkite-agent artifact download build/distributions/** . --step 'package-it' --build ${BUILDKITE_TRIGGERED_FROM_BUILD_ID} + .buildkite/scripts/steps/integration_tests_tf.sh {{matrix}} false + artifact_paths: + - build/** + - build/diagnostics/** + agents: + provider: "gcp" + machineType: "n1-standard-8" + image: "family/platform-ingest-elastic-agent-ubuntu-2404" + matrix: + - default + + - label: "x86_64:sudo: {{matrix}}" + command: | + buildkite-agent artifact download build/distributions/** . --step 'package-it' --build ${BUILDKITE_TRIGGERED_FROM_BUILD_ID} + .buildkite/scripts/steps/integration_tests_tf.sh {{matrix}} true + artifact_paths: + - build/** + - build/diagnostics/** + agents: + provider: "gcp" + machineType: "n1-standard-8" + image: "family/platform-ingest-elastic-agent-ubuntu-2404" + matrix: + - default + - container + - fleet-upgrade-to-pr-build + - upgrade + - fleet + - fqdn + - deb + - fleet-airgapped + - fleet-privileged + - fleet-airgapped-privileged - # - group: "Stateful:Ubuntu" - # key: integration-tests-ubuntu - # depends_on: - # - integration-ess - # steps: - # - label: "x86_64:non-sudo: {{matrix}}" - # command: | - # buildkite-agent artifact download build/distributions/** . --step 'package-it' --build ${BUILDKITE_TRIGGERED_FROM_BUILD_ID} - # .buildkite/scripts/steps/integration_tests_tf.sh {{matrix}} false - # artifact_paths: - # - build/** - # - build/diagnostics/** - # agents: - # provider: "gcp" - # machineType: "n1-standard-8" - # image: "family/platform-ingest-elastic-agent-ubuntu-2404" - # matrix: - # - default - # - # - label: "x86_64:sudo: {{matrix}}" - # command: | - # buildkite-agent artifact download build/distributions/** . --step 'package-it' --build ${BUILDKITE_TRIGGERED_FROM_BUILD_ID} - # .buildkite/scripts/steps/integration_tests_tf.sh {{matrix}} true - # artifact_paths: - # - build/** - # - build/diagnostics/** - # agents: - # provider: "gcp" - # machineType: "n1-standard-8" - # image: "family/platform-ingest-elastic-agent-ubuntu-2404" - # matrix: - # - default - # - container - # - fleet-upgrade-to-pr-build - # - upgrade - # - fleet - # - fqdn - # - deb - # - fleet-airgapped - # - fleet-privileged - # - fleet-airgapped-privileged - # - # - label: "arm:sudo: {{matrix}}" - # skip: true - # command: | - # buildkite-agent artifact download build/distributions/** . --step 'package-it' --build ${BUILDKITE_TRIGGERED_FROM_BUILD_ID} - # .buildkite/scripts/steps/integration_tests_tf.sh {{matrix}} true - # artifact_paths: - # - build/** - # - build/diagnostics/** - # agents: - # provider: "aws" - # imagePrefix: "platform-ingest-beats-ubuntu-2404-aarch64" - # instanceType: "m6g.2xlarge" - # matrix: - # - default - # - container - # - fleet-upgrade-to-pr-build - # - upgrade - # - fleet - # - fqdn - # - deb - # - fleet-airgapped - # - fleet-privileged - # - fleet-airgapped-privileged - # - # - label: "arm:non-sudo: {{matrix}}" - # skip: true - # command: | - # buildkite-agent artifact download build/distributions/** . --step 'package-it' --build ${BUILDKITE_TRIGGERED_FROM_BUILD_ID} - # .buildkite/scripts/steps/integration_tests_tf.sh {{matrix}} false - # artifact_paths: - # - build/** - # - build/diagnostics/** - # agents: - # provider: "aws" - # imagePrefix: "platform-ingest-beats-ubuntu-2404-aarch64" - # instanceType: "m6g.xlarge" - # matrix: - # - default + - label: "arm:sudo: {{matrix}}" + skip: true + command: | + buildkite-agent artifact download build/distributions/** . --step 'package-it' --build ${BUILDKITE_TRIGGERED_FROM_BUILD_ID} + .buildkite/scripts/steps/integration_tests_tf.sh {{matrix}} true + artifact_paths: + - build/** + - build/diagnostics/** + agents: + provider: "aws" + imagePrefix: "platform-ingest-beats-ubuntu-2404-aarch64" + instanceType: "m6g.2xlarge" + matrix: + - default + - container + - fleet-upgrade-to-pr-build + - upgrade + - fleet + - fqdn + - deb + - fleet-airgapped + - fleet-privileged + - fleet-airgapped-privileged + + - label: "arm:non-sudo: {{matrix}}" + skip: true + command: | + buildkite-agent artifact download build/distributions/** . --step 'package-it' --build ${BUILDKITE_TRIGGERED_FROM_BUILD_ID} + .buildkite/scripts/steps/integration_tests_tf.sh {{matrix}} false + artifact_paths: + - build/** + - build/diagnostics/** + agents: + provider: "aws" + imagePrefix: "platform-ingest-beats-ubuntu-2404-aarch64" + instanceType: "m6g.xlarge" + matrix: + - default - group: "Stateful(Sudo):RHEL8" key: integration-tests-rhel8 diff --git a/.buildkite/scripts/steps/integration_tests.sh b/.buildkite/scripts/steps/integration_tests.sh index d58503f07cb..14be77a903f 100755 --- a/.buildkite/scripts/steps/integration_tests.sh +++ b/.buildkite/scripts/steps/integration_tests.sh @@ -18,7 +18,7 @@ fi # Run integration tests set +e -AGENT_STACK_VERSION="${STACK_VERSION}" TEST_INTEG_CLEAN_ON_EXIT=true TEST_PACKAGES="rpm" TEST_GROUPS="rpm" STACK_PROVISIONER="$STACK_PROVISIONER" SNAPSHOT=true mage $MAGE_TARGET $MAGE_SUBTARGET +AGENT_STACK_VERSION="${STACK_VERSION}" TEST_INTEG_CLEAN_ON_EXIT=true STACK_PROVISIONER="$STACK_PROVISIONER" SNAPSHOT=true mage $MAGE_TARGET $MAGE_SUBTARGET TESTS_EXIT_STATUS=$? set -e From 445a2a6d4062f97ae6b48b10905a9eabef2ffbd2 Mon Sep 17 00:00:00 2001 From: kaanyalti Date: Wed, 18 Dec 2024 22:23:38 -0500 Subject: [PATCH 13/20] enhancement(5832): uncommented integration test pipeline --- .buildkite/integration.pipeline.yml | 38 ++++++++++++++--------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/.buildkite/integration.pipeline.yml b/.buildkite/integration.pipeline.yml index 14c6f714bfc..a637f499dd2 100644 --- a/.buildkite/integration.pipeline.yml +++ b/.buildkite/integration.pipeline.yml @@ -83,22 +83,22 @@ steps: - github_commit_status: context: "buildkite/elastic-agent-extended-testing - Serverless Beats Tests" - # - label: "Kubernetes Integration tests" - # key: "k8s-integration-tests" - # env: - # K8S_VERSION: "v1.31.0" - # KIND_VERSION: "v0.24.0" - # command: ".buildkite/scripts/steps/k8s-extended-tests.sh" - # artifact_paths: - # - "build/k8s-logs*/*" - # - "build/k8s-logs*/**/*" - # - "build/TEST-**" - # - "build/diagnostics/*" - # agents: - # provider: "gcp" - # machineType: "c2-standard-16" - # image: "family/core-ubuntu-2204" - # diskSizeGb: 400 - # notify: - # - github_commit_status: - # context: "buildkite/elastic-agent-extended-testing - Kubernetes Integration tests" + - label: "Kubernetes Integration tests" + key: "k8s-integration-tests" + env: + K8S_VERSION: "v1.31.0" + KIND_VERSION: "v0.24.0" + command: ".buildkite/scripts/steps/k8s-extended-tests.sh" + artifact_paths: + - "build/k8s-logs*/*" + - "build/k8s-logs*/**/*" + - "build/TEST-**" + - "build/diagnostics/*" + agents: + provider: "gcp" + machineType: "c2-standard-16" + image: "family/core-ubuntu-2204" + diskSizeGb: 400 + notify: + - github_commit_status: + context: "buildkite/elastic-agent-extended-testing - Kubernetes Integration tests" From f7fb263467a06b33ead978cb456e01544e271a4d Mon Sep 17 00:00:00 2001 From: kaanyalti Date: Wed, 18 Dec 2024 22:25:08 -0500 Subject: [PATCH 14/20] enhancement(5832): reverting unnecessary changes --- pkg/testing/fixture.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/testing/fixture.go b/pkg/testing/fixture.go index e9e363be6c8..aa85605b9d8 100644 --- a/pkg/testing/fixture.go +++ b/pkg/testing/fixture.go @@ -1029,7 +1029,7 @@ func (f *Fixture) DumpProcesses(suffix string) { } f.t.Logf("Dumping running processes in %s", filePath) - file, err := os.OpenFile(filePath, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0777) + file, err := os.OpenFile(filePath, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0o644) if err != nil { f.t.Logf("failed to dump process; failed to create output file %s root: %s", filePath, err) return From 20972391961b035fe5659f2c9873d677de7aaf72 Mon Sep 17 00:00:00 2001 From: kaanyalti Date: Wed, 18 Dec 2024 22:26:01 -0500 Subject: [PATCH 15/20] enhancement(5832): uncommenting integration tests --- .buildkite/integration.pipeline.yml | 82 ++++++++++++++--------------- 1 file changed, 41 insertions(+), 41 deletions(-) diff --git a/.buildkite/integration.pipeline.yml b/.buildkite/integration.pipeline.yml index a637f499dd2..3095024feff 100644 --- a/.buildkite/integration.pipeline.yml +++ b/.buildkite/integration.pipeline.yml @@ -14,48 +14,48 @@ steps: provider: "gcp" machineType: "n1-standard-8" - # - label: "Serverless integration test" - # key: "serverless-integration-tests" - # depends_on: - # - package-it - # concurrency_group: elastic-agent-extended-testing/serverless-integration - # concurrency: 8 - # env: - # # we run each step in a different data center to spread the load - # TEST_INTEG_AUTH_GCP_DATACENTER: "us-central1-a" - # command: | - # buildkite-agent artifact download build/distributions/** . --step 'package-it' - # .buildkite/scripts/steps/integration_tests.sh serverless integration:single TestLogIngestionFleetManaged #right now, run a single test in serverless mode as a sort of smoke test, instead of re-running the entire suite - # artifact_paths: - # - "build/TEST-**" - # - "build/diagnostics/*" - # agents: - # provider: "gcp" - # machineType: "n1-standard-8" - # notify: - # - github_commit_status: - # context: "buildkite/elastic-agent-extended-testing - Serverless integration test" + - label: "Serverless integration test" + key: "serverless-integration-tests" + depends_on: + - package-it + concurrency_group: elastic-agent-extended-testing/serverless-integration + concurrency: 8 + env: + # we run each step in a different data center to spread the load + TEST_INTEG_AUTH_GCP_DATACENTER: "us-central1-a" + command: | + buildkite-agent artifact download build/distributions/** . --step 'package-it' + .buildkite/scripts/steps/integration_tests.sh serverless integration:single TestLogIngestionFleetManaged #right now, run a single test in serverless mode as a sort of smoke test, instead of re-running the entire suite + artifact_paths: + - "build/TEST-**" + - "build/diagnostics/*" + agents: + provider: "gcp" + machineType: "n1-standard-8" + notify: + - github_commit_status: + context: "buildkite/elastic-agent-extended-testing - Serverless integration test" - # - label: "Extended runtime leak tests" - # key: "extended-integration-tests" - # depends_on: - # - package-it - # concurrency_group: elastic-agent-extended-testing/leak-tests - # concurrency: 8 - # env: - # TEST_INTEG_AUTH_GCP_DATACENTER: "us-central1-b" - # command: | - # buildkite-agent artifact download build/distributions/** . --step 'package-it' - # .buildkite/scripts/steps/integration_tests.sh stateful integration:TestForResourceLeaks - # artifact_paths: - # - "build/TEST-**" - # - "build/diagnostics/*" - # agents: - # provider: "gcp" - # machineType: "n1-standard-8" - # notify: - # - github_commit_status: - # context: "buildkite/elastic-agent-extended-testing - Extended runtime leak tests" + - label: "Extended runtime leak tests" + key: "extended-integration-tests" + depends_on: + - package-it + concurrency_group: elastic-agent-extended-testing/leak-tests + concurrency: 8 + env: + TEST_INTEG_AUTH_GCP_DATACENTER: "us-central1-b" + command: | + buildkite-agent artifact download build/distributions/** . --step 'package-it' + .buildkite/scripts/steps/integration_tests.sh stateful integration:TestForResourceLeaks + artifact_paths: + - "build/TEST-**" + - "build/diagnostics/*" + agents: + provider: "gcp" + machineType: "n1-standard-8" + notify: + - github_commit_status: + context: "buildkite/elastic-agent-extended-testing - Extended runtime leak tests" - label: "Triggering Integration tests" depends_on: From 62e35b684c6b49bff40fd7e0b555dba7ba21886f Mon Sep 17 00:00:00 2001 From: kaanyalti Date: Wed, 18 Dec 2024 23:12:54 -0500 Subject: [PATCH 16/20] enhancement(5832): reverted integration_tests.sh changes --- .buildkite/scripts/steps/integration_tests.sh | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/.buildkite/scripts/steps/integration_tests.sh b/.buildkite/scripts/steps/integration_tests.sh index 14be77a903f..834da1cd4c6 100755 --- a/.buildkite/scripts/steps/integration_tests.sh +++ b/.buildkite/scripts/steps/integration_tests.sh @@ -7,18 +7,19 @@ STACK_PROVISIONER="${1:-"stateful"}" MAGE_TARGET="${2:-"integration:test"}" MAGE_SUBTARGET="${3:-""}" + # Override the stack version from `.package-version` contents # There is a time when the current snapshot is not available on cloud yet, so we cannot use the latest version automatically # This file is managed by an automation (mage integration:UpdateAgentPackageVersion) that check if the snapshot is ready. STACK_VERSION="$(cat .package-version)" if [[ -n "$STACK_VERSION" ]]; then - STACK_VERSION=${STACK_VERSION}"-SNAPSHOT" + STACK_VERSION=${STACK_VERSION}"-SNAPSHOT" fi # Run integration tests set +e -AGENT_STACK_VERSION="${STACK_VERSION}" TEST_INTEG_CLEAN_ON_EXIT=true STACK_PROVISIONER="$STACK_PROVISIONER" SNAPSHOT=true mage $MAGE_TARGET $MAGE_SUBTARGET +AGENT_STACK_VERSION="${STACK_VERSION}" TEST_INTEG_CLEAN_ON_EXIT=true STACK_PROVISIONER="$STACK_PROVISIONER" SNAPSHOT=true mage $MAGE_TARGET $MAGE_SUBTARGET TESTS_EXIT_STATUS=$? set -e @@ -26,10 +27,10 @@ set -e outputXML="build/TEST-go-integration.xml" if [ -f "$outputXML" ]; then - go install github.com/alexec/junit2html@latest - junit2html <"$outputXML" >build/TEST-report.html + go install github.com/alexec/junit2html@latest + junit2html < "$outputXML" > build/TEST-report.html else - echo "Cannot generate HTML test report: $outputXML not found" + echo "Cannot generate HTML test report: $outputXML not found" fi exit $TESTS_EXIT_STATUS From d38e46843b889e842b20b64a26839596b0677f23 Mon Sep 17 00:00:00 2001 From: kaanyalti Date: Wed, 18 Dec 2024 23:15:34 -0500 Subject: [PATCH 17/20] enhancement(5832): reverted fixture_install.go changes --- pkg/testing/fixture.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pkg/testing/fixture.go b/pkg/testing/fixture.go index aa85605b9d8..89968451747 100644 --- a/pkg/testing/fixture.go +++ b/pkg/testing/fixture.go @@ -341,6 +341,7 @@ func (f *Fixture) RunBeat(ctx context.Context) error { process.WithContext(ctx), process.WithArgs(args), process.WithCmdOptions(attachOutErr(stdOut, stdErr))) + if err != nil { return fmt.Errorf("failed to spawn %s: %w", f.binaryName, err) } @@ -395,8 +396,7 @@ func RunProcess(t *testing.T, lp Logger, ctx context.Context, runLength time.Duration, logOutput, allowErrs bool, - processPath string, args ...string, -) error { + processPath string, args ...string) error { if _, deadlineSet := ctx.Deadline(); !deadlineSet { t.Fatal("Context passed to RunProcess() has no deadline set.") } @@ -414,6 +414,7 @@ func RunProcess(t *testing.T, process.WithContext(ctx), process.WithArgs(args), process.WithCmdOptions(attachOutErr(stdOut, stdErr))) + if err != nil { return fmt.Errorf("failed to spawn %q: %w", processPath, err) } @@ -543,6 +544,7 @@ func (f *Fixture) executeWithClient(ctx context.Context, command string, disable process.WithContext(ctx), process.WithArgs(args), process.WithCmdOptions(attachOutErr(stdOut, stdErr))) + if err != nil { return fmt.Errorf("failed to spawn %s: %w", f.binaryName, err) } From f29f380d539d98c25514647db66963d133c91e24 Mon Sep 17 00:00:00 2001 From: kaanyalti Date: Thu, 19 Dec 2024 00:25:24 -0500 Subject: [PATCH 18/20] enhancement(5832): remove print --- pkg/testing/fixture_install.go | 1 - 1 file changed, 1 deletion(-) diff --git a/pkg/testing/fixture_install.go b/pkg/testing/fixture_install.go index 289e6356d7d..c22b1534458 100644 --- a/pkg/testing/fixture_install.go +++ b/pkg/testing/fixture_install.go @@ -187,7 +187,6 @@ func (f *Fixture) installFunc(ctx context.Context, installOpts *InstallOpts, sho // check for running agents before installing, but only if not installed into a namespace whose point is allowing two agents at once. if installOpts != nil && !installOpts.Develop && installOpts.Namespace == "" { - fmt.Println("TESTING IF ANY OTHER AGENTS ARE RUNNING ", getElasticAgentProcesses(f.t)) assert.Empty(f.t, getElasticAgentProcesses(f.t), "there should be no running agent at beginning of Install()") } From 978a940c45226573d3af91c3a2fbd1c4bf5ba8d1 Mon Sep 17 00:00:00 2001 From: kaanyalti Date: Thu, 19 Dec 2024 00:38:43 -0500 Subject: [PATCH 19/20] enhancement(5832): logging output if cleanup fails --- pkg/testing/fixture_install.go | 1 + 1 file changed, 1 insertion(+) diff --git a/pkg/testing/fixture_install.go b/pkg/testing/fixture_install.go index c22b1534458..d983e6fb62f 100644 --- a/pkg/testing/fixture_install.go +++ b/pkg/testing/fixture_install.go @@ -550,6 +550,7 @@ func (f *Fixture) installRpm(ctx context.Context, installOpts *InstallOpts, shou f.t.Logf("removing installed agent files") out, err = exec.CommandContext(uninstallCtx, "sudo", "rm", "-rf", "/var/lib/elastic-agent", "/var/log/elastic-agent", "/etc/elastic-agent").CombinedOutput() if err != nil { + f.t.Log(string(out)) f.t.Logf("failed to 'sudo rm -rf /var/lib/elastic-agent /var/log/elastic-agent/ /etc/elastic-agent'") f.t.FailNow() } From fc6ed16d1799f3fe755167e5c3845276c3adde21 Mon Sep 17 00:00:00 2001 From: kaanyalti Date: Thu, 19 Dec 2024 21:40:41 -0500 Subject: [PATCH 20/20] enhancement(5832): updated k8s test, refactored upgrade test into test step --- .../kubernetes_agent_standalone_test.go | 231 ++---------------- 1 file changed, 22 insertions(+), 209 deletions(-) diff --git a/testing/integration/kubernetes_agent_standalone_test.go b/testing/integration/kubernetes_agent_standalone_test.go index 149265d334a..24b91ab0d38 100644 --- a/testing/integration/kubernetes_agent_standalone_test.go +++ b/testing/integration/kubernetes_agent_standalone_test.go @@ -289,6 +289,7 @@ func TestKubernetesAgentHelm(t *testing.T) { k8sStepCheckAgentStatus("name=agent-pernode-helm-agent", schedulableNodeCount, "agent", nil), k8sStepCheckAgentStatus("name=agent-clusterwide-helm-agent", 1, "agent", nil), k8sStepCheckAgentStatus("name=agent-ksmsharded-helm-agent", 1, "agent", nil), + k8sStepCheckRestrictUpgrade("name=agent-pernode-helm-agent", schedulableNodeCount, "agent"), k8sStepRunInnerTests("name=agent-pernode-helm-agent", schedulableNodeCount, "agent"), k8sStepRunInnerTests("name=agent-clusterwide-helm-agent", 1, "agent"), k8sStepRunInnerTests("name=agent-ksmsharded-helm-agent", 1, "agent"), @@ -321,6 +322,7 @@ func TestKubernetesAgentHelm(t *testing.T) { k8sStepCheckAgentStatus("name=agent-pernode-helm-agent", schedulableNodeCount, "agent", nil), k8sStepCheckAgentStatus("name=agent-clusterwide-helm-agent", 1, "agent", nil), k8sStepCheckAgentStatus("name=agent-ksmsharded-helm-agent", 1, "agent", nil), + k8sStepCheckRestrictUpgrade("name=agent-pernode-helm-agent", schedulableNodeCount, "agent"), k8sStepRunInnerTests("name=agent-pernode-helm-agent", schedulableNodeCount, "agent"), k8sStepRunInnerTests("name=agent-clusterwide-helm-agent", 1, "agent"), k8sStepRunInnerTests("name=agent-ksmsharded-helm-agent", 1, "agent"), @@ -480,215 +482,6 @@ func TestKubernetesAgentHelm(t *testing.T) { } } -func TestRestrictCliUpgrade(t *testing.T) { - info := define.Require(t, define.Requirements{ - Stack: &define.Stack{}, - Local: false, - Sudo: false, - OS: []define.OS{ - {Type: define.Kubernetes, DockerVariant: "basic"}, - {Type: define.Kubernetes, DockerVariant: "wolfi"}, - {Type: define.Kubernetes, DockerVariant: "ubi"}, - {Type: define.Kubernetes, DockerVariant: "complete"}, - {Type: define.Kubernetes, DockerVariant: "complete-wolfi"}, - }, - Group: define.Kubernetes, - }) - - agentImage := os.Getenv("AGENT_IMAGE") - require.NotEmpty(t, agentImage, "AGENT_IMAGE must be set") - - agentImageParts := strings.SplitN(agentImage, ":", 2) - require.Len(t, agentImageParts, 2, "AGENT_IMAGE must be in the form ':'") - agentImageRepo := agentImageParts[0] - agentImageTag := agentImageParts[1] - - client, err := info.KubeClient() - require.NoError(t, err) - require.NotNil(t, client) - - testLogsBasePath := os.Getenv("K8S_TESTS_POD_LOGS_BASE") - require.NotEmpty(t, testLogsBasePath, "K8S_TESTS_POD_LOGS_BASE must be set") - - err = os.MkdirAll(filepath.Join(testLogsBasePath, t.Name()), 0755) - require.NoError(t, err, "failed to create test logs directory") - - namespace := info.Namespace - - esHost := os.Getenv("ELASTICSEARCH_HOST") - require.NotEmpty(t, esHost, "ELASTICSEARCH_HOST must be set") - - esAPIKey, err := generateESAPIKey(info.ESClient, namespace) - require.NoError(t, err, "failed to generate ES API key") - require.NotEmpty(t, esAPIKey, "failed to generate ES API key") - - require.NoError(t, err, "failed to create fleet enroll params") - - testCases := []struct { - name string - values map[string]any - atLeastValidatedPodsNumber int - runK8SInnerTests bool - }{ - { - name: "helm standalone agent default kubernetes privileged", - values: map[string]any{ - "kubernetes": map[string]any{ - "enabled": true, - }, - "agent": map[string]any{ - "unprivileged": false, - "image": map[string]any{ - "repository": agentImageRepo, - "tag": agentImageTag, - "pullPolicy": "Never", - }, - }, - "outputs": map[string]any{ - "default": map[string]any{ - "type": "ESPlainAuthAPI", - "url": esHost, - "api_key": esAPIKey, - }, - }, - }, - runK8SInnerTests: true, - // - perNode Daemonset (at least 1 agent pod) - // - clusterWide Deployment (1 agent pod) - // - ksmSharded Statefulset (1 agent pod) - atLeastValidatedPodsNumber: 3, - }, - { - name: "helm standalone agent default kubernetes unprivileged", - values: map[string]any{ - "kubernetes": map[string]any{ - "enabled": true, - }, - "agent": map[string]any{ - "unprivileged": true, - "image": map[string]any{ - "repository": agentImageRepo, - "tag": agentImageTag, - "pullPolicy": "Never", - }, - }, - "outputs": map[string]any{ - "default": map[string]any{ - "type": "ESPlainAuthAPI", - "url": esHost, - "api_key": esAPIKey, - }, - }, - }, - runK8SInnerTests: true, - // - perNode Daemonset (at least 1 agent pod) - // - clusterWide Deployment (1 agent pod) - // - ksmSharded Statefulset (1 agent pod) - atLeastValidatedPodsNumber: 3, - }, - } - - for _, tc := range testCases { - t.Run(tc.name, func(t *testing.T) { - ctx := context.Background() - hasher := sha256.New() - hasher.Write([]byte(tc.name)) - testNamespace := strings.ToLower(base64.URLEncoding.EncodeToString(hasher.Sum(nil))) - testNamespace = noSpecialCharsRegexp.ReplaceAllString(testNamespace, "") - - settings := cli.New() - settings.SetNamespace(testNamespace) - actionConfig := &action.Configuration{} - - helmChart, err := loader.Load(agentK8SHelm) - require.NoError(t, err, "failed to load helm chart") - - err = actionConfig.Init(settings.RESTClientGetter(), settings.Namespace(), "", - func(format string, v ...interface{}) {}) - require.NoError(t, err, "failed to init helm action config") - - helmValues := tc.values - - t.Cleanup(func() { - if t.Failed() { - k8sDumpAllPodLogs(ctx, client, testNamespace, testNamespace, testLogsBasePath) - } - - uninstallAction := action.NewUninstall(actionConfig) - uninstallAction.Wait = true - - _, err = uninstallAction.Run("helm-agent") - if err != nil { - require.NoError(t, err, "failed to uninstall helm chart") - } - }) - - installAction := action.NewInstall(actionConfig) - installAction.Namespace = testNamespace - installAction.CreateNamespace = true - installAction.UseReleaseName = true - installAction.ReleaseName = "helm-agent" - installAction.Timeout = 2 * time.Minute - installAction.Wait = true - installAction.WaitForJobs = true - _, err = installAction.Run(helmChart, helmValues) - require.NoError(t, err, "failed to install helm chart") - - podList := &corev1.PodList{} - err = client.Resources(testNamespace).List(ctx, podList) - require.NoError(t, err, fmt.Sprintf("failed to list pods in namespace %s", testNamespace)) - - checkedAgentContainers := 0 - - for _, pod := range podList.Items { - if !strings.HasPrefix(pod.GetName(), "agent-") { - continue - } - - command := []string{"elastic-agent", "status"} - var stdout, stderr bytes.Buffer - var agentHealthyErr error - // we will wait maximum 120 seconds for the agent to report healthy - for i := 0; i < 120; i++ { - stdout.Reset() - stderr.Reset() - agentHealthyErr = client.Resources().ExecInPod(ctx, testNamespace, pod.Name, "agent", command, &stdout, &stderr) - if agentHealthyErr == nil { - break - } - time.Sleep(time.Second * 1) - } - - statusString := stdout.String() - if agentHealthyErr != nil { - t.Errorf("elastic-agent never reported healthy: %v", agentHealthyErr) - t.Logf("stdout: %s\n", statusString) - t.Logf("stderr: %s\n", stderr.String()) - t.FailNow() - return - } - - stdout.Reset() - stderr.Reset() - - upgradeCmd := []string{"elastic-agent", "upgrade", "1.0.0"} - upgradeCmdError := client.Resources().ExecInPod(ctx, testNamespace, pod.Name, "agent", upgradeCmd, &stdout, &stderr) - - errOut := stderr.String() - require.Error(t, upgradeCmdError) - require.Contains(t, errOut, coordinator.ErrNotUpgradable.Error()) - - stderr.Reset() - - checkedAgentContainers++ - } - - require.GreaterOrEqual(t, checkedAgentContainers, tc.atLeastValidatedPodsNumber, - fmt.Sprintf("at least %d agent containers should be checked", tc.atLeastValidatedPodsNumber)) - }) - } -} - // k8sCheckAgentStatus checks that the agent reports healthy. func k8sCheckAgentStatus(ctx context.Context, client klient.Client, stdout *bytes.Buffer, stderr *bytes.Buffer, namespace string, agentPodName string, containerName string, componentPresence map[string]bool, @@ -1545,3 +1338,23 @@ func k8sStepHintsRedisDelete() k8sTestStep { require.NoError(t, err, "failed to delete redis k8s objects") } } + +func k8sStepCheckRestrictUpgrade(agentPodLabelSelector string, expectedPodNumber int, containerName string) k8sTestStep { + return func(t *testing.T, ctx context.Context, kCtx k8sContext, namespace string) { + perNodePodList := &corev1.PodList{} + err := kCtx.client.Resources(namespace).List(ctx, perNodePodList, func(opt *metav1.ListOptions) { + opt.LabelSelector = agentPodLabelSelector + }) + require.NoError(t, err, "failed to list pods with selector ", perNodePodList) + require.NotEmpty(t, perNodePodList.Items, "no pods found with selector ", perNodePodList) + require.Equal(t, expectedPodNumber, len(perNodePodList.Items), "unexpected number of pods found with selector ", perNodePodList) + for _, pod := range perNodePodList.Items { + var stdout, stderr bytes.Buffer + + command := []string{"elastic-agent", "upgrade", "1.0.0"} + err := kCtx.client.Resources().ExecInPod(ctx, namespace, pod.Name, containerName, command, &stdout, &stderr) + require.Error(t, err) + require.Contains(t, stderr.String(), coordinator.ErrNotUpgradable.Error()) + } + } +}