From 045c84e02662546f6d0967dde29a6bda03b48061 Mon Sep 17 00:00:00 2001 From: "mergify[bot]" <37929162+mergify[bot]@users.noreply.github.com> Date: Thu, 5 Oct 2023 16:42:26 +0000 Subject: [PATCH] [8.10](backport #3477) Refactor and fix all upgrade integration tests (#3495) * Refactor and fix all upgrade integration tests (#3477) * Fix all upgrade tests. * Fix imports and headers. * Update notice. * Exclude testing/** from sonar. * Fix comments from code review. * Add extra error information in the artifact fetcher. * Fixes from code review. * Add upgrade uninstall kill watcher test. * Remove go replace. Regenerate notice. Fix lint. * Import WithSourceURI logic. Fix fleet test to not skip if the versions are different and the commits are the same. * More test fixes. * Fix imports. * Re-add TestStandaloneUpgradeFailsWhenUpgradeIsInProgress. Fix code review. (cherry picked from commit 6201e19a5b8ebfb49fb25aca0941ee269cfbf42b) # Conflicts: # sonar-project.properties # testing/integration/upgrade_test.go * Fix merge. --------- Co-authored-by: Blake Rouse --- NOTICE.txt | 4 +- go.mod | 2 +- go.sum | 4 +- magefile.go | 6 +- pkg/testing/fetcher_artifact.go | 40 +- pkg/testing/fixture.go | 50 +- pkg/testing/fixture_install.go | 3 +- pkg/testing/tools/agents.go | 3 +- pkg/utils/watcher.go | 45 + sonar-project.properties | 2 +- .../upgrade_broken_package_test.go | 92 ++ testing/integration/upgrade_downgrade_test.go | 87 ++ testing/integration/upgrade_fleet_test.go | 175 +++ testing/integration/upgrade_gpg_test.go | 132 ++ testing/integration/upgrade_rollback_test.go | 196 +++ .../upgrade_standalone_inprogress.go | 98 ++ .../upgrade_standalone_retry_test.go | 96 ++ .../integration/upgrade_standalone_test.go | 51 + testing/integration/upgrade_test.go | 1095 ----------------- testing/integration/upgrade_uninstall_test.go | 90 ++ testing/upgradetest/logger.go | 9 + testing/upgradetest/upgrader.go | 367 ++++++ testing/upgradetest/versions.go | 114 ++ testing/upgradetest/watcher.go | 110 ++ 24 files changed, 1756 insertions(+), 1115 deletions(-) create mode 100644 pkg/utils/watcher.go create mode 100644 testing/integration/upgrade_broken_package_test.go create mode 100644 testing/integration/upgrade_downgrade_test.go create mode 100644 testing/integration/upgrade_fleet_test.go create mode 100644 testing/integration/upgrade_gpg_test.go create mode 100644 testing/integration/upgrade_rollback_test.go create mode 100644 testing/integration/upgrade_standalone_inprogress.go create mode 100644 testing/integration/upgrade_standalone_retry_test.go create mode 100644 testing/integration/upgrade_standalone_test.go delete mode 100644 testing/integration/upgrade_test.go create mode 100644 testing/integration/upgrade_uninstall_test.go create mode 100644 testing/upgradetest/logger.go create mode 100644 testing/upgradetest/upgrader.go create mode 100644 testing/upgradetest/versions.go create mode 100644 testing/upgradetest/watcher.go diff --git a/NOTICE.txt b/NOTICE.txt index 4a48afc5a6c..f98d6c9dfb7 100644 --- a/NOTICE.txt +++ b/NOTICE.txt @@ -1367,11 +1367,11 @@ SOFTWARE -------------------------------------------------------------------------------- Dependency : github.com/elastic/elastic-agent-libs -Version: v0.3.13 +Version: v0.5.0 Licence type (autodetected): Apache-2.0 -------------------------------------------------------------------------------- -Contents of probable licence file $GOMODCACHE/github.com/elastic/elastic-agent-libs@v0.3.13/LICENSE: +Contents of probable licence file $GOMODCACHE/github.com/elastic/elastic-agent-libs@v0.5.0/LICENSE: Apache License Version 2.0, January 2004 diff --git a/go.mod b/go.mod index 172fcc5c204..f1a94183543 100644 --- a/go.mod +++ b/go.mod @@ -15,7 +15,7 @@ require ( github.com/elastic/e2e-testing v1.1.0 github.com/elastic/elastic-agent-autodiscover v0.6.2 github.com/elastic/elastic-agent-client/v7 v7.3.0 - github.com/elastic/elastic-agent-libs v0.3.13 + github.com/elastic/elastic-agent-libs v0.5.0 github.com/elastic/elastic-agent-system-metrics v0.6.1 github.com/elastic/elastic-transport-go/v8 v8.3.0 github.com/elastic/go-elasticsearch/v8 v8.8.2 diff --git a/go.sum b/go.sum index 13586635f18..d24b797837f 100644 --- a/go.sum +++ b/go.sum @@ -779,8 +779,8 @@ github.com/elastic/elastic-agent-autodiscover v0.6.2 h1:7P3cbMBWXjbzA80rxitQjc+P github.com/elastic/elastic-agent-autodiscover v0.6.2/go.mod h1:yXYKFAG+Py+TcE4CCR8EAbJiYb+6Dz9sCDoWgOveqtU= github.com/elastic/elastic-agent-client/v7 v7.3.0 h1:LugKtBXK7bp4SFL/uQqGU/f4Ppx12Jk5a36voGabLa0= github.com/elastic/elastic-agent-client/v7 v7.3.0/go.mod h1:9/amG2K2y2oqx39zURcc+hnqcX+nyJ1cZrLgzsgo5c0= -github.com/elastic/elastic-agent-libs v0.3.13 h1:qFiBWeBfjsBId+i31rggyW2ZjzA9qBRz7wIiy+rkcvc= -github.com/elastic/elastic-agent-libs v0.3.13/go.mod h1:mpSfrigixx8x+uMxWKl4LtdlrKIhZbA4yT2eIeIazUQ= +github.com/elastic/elastic-agent-libs v0.5.0 h1:8LbxSuMiGy8xhHX5NrE/dmTLsLMEuA+2AODUsiBfEcE= +github.com/elastic/elastic-agent-libs v0.5.0/go.mod h1:mpSfrigixx8x+uMxWKl4LtdlrKIhZbA4yT2eIeIazUQ= github.com/elastic/elastic-agent-system-metrics v0.6.1 h1:LCN1lvQTkdUuU/rKlpKyVMDU/G/I8/iZWCaW6K+mo4o= github.com/elastic/elastic-agent-system-metrics v0.6.1/go.mod h1:Bj8XM/uNKm553blQHkGNEICRLGnVEtw8yttmV5vBngA= github.com/elastic/elastic-integration-corpus-generator-tool v0.5.0/go.mod h1:uf9N86y+UACGybdEhZLpwZ93XHWVhsYZAA4c2T2v6YM= diff --git a/magefile.go b/magefile.go index 3d0e6de1e9e..06016bd48ff 100644 --- a/magefile.go +++ b/magefile.go @@ -1495,7 +1495,11 @@ func (Integration) Local(ctx context.Context, testName string) error { params.Tags = append(params.Tags, "local") params.Packages = []string{"github.com/elastic/elastic-agent/testing/integration"} - goTestFlags := strings.SplitN(os.Getenv("GOTEST_FLAGS"), " ", -1) + var goTestFlags []string + rawTestFlags := os.Getenv("GOTEST_FLAGS") + if rawTestFlags != "" { + goTestFlags = strings.Split(rawTestFlags, " ") + } params.ExtraFlags = goTestFlags if testName == "all" { diff --git a/pkg/testing/fetcher_artifact.go b/pkg/testing/fetcher_artifact.go index 026a71a1bf3..d3ec48c249e 100644 --- a/pkg/testing/fetcher_artifact.go +++ b/pkg/testing/fetcher_artifact.go @@ -63,15 +63,19 @@ func (f *artifactFetcher) Fetch(ctx context.Context, operatingSystem string, arc } var uri string + var prevErr error if !f.snapshotOnly { - uri, _ = findURI(ctx, f.doer, version) + uri, prevErr = findURI(ctx, f.doer, version) } preVersion := version + version, _ = splitBuildID(version) if uri == "" { - version = fmt.Sprintf("%s-SNAPSHOT", version) + if !strings.HasSuffix(version, "-SNAPSHOT") { + version += "-SNAPSHOT" + } uri, err = findURI(ctx, f.doer, version) if err != nil { - return nil, fmt.Errorf("failed to find snapshot URI for version %s: %w", preVersion, err) + return nil, fmt.Errorf("failed to find snapshot URI for version %s: %w (previous error: %w)", preVersion, err, prevErr) } } @@ -112,6 +116,7 @@ func (r *artifactResult) Fetch(ctx context.Context, l Logger, dir string) error } func findURI(ctx context.Context, doer httpDoer, version string) (string, error) { + version, buildID := splitBuildID(version) artifactsURI := fmt.Sprintf("https://artifacts-api.elastic.co/v1/search/%s/elastic-agent", version) req, err := http.NewRequestWithContext(ctx, "GET", artifactsURI, nil) if err != nil { @@ -165,11 +170,36 @@ func findURI(ctx context.Context, doer httpDoer, version string) (string, error) // https://snapshots.elastic.co/8.7.0-d050210c/downloads/elastic-agent-shipper/elastic-agent-shipper-8.7.0-SNAPSHOT-linux-x86_64.tar.gz index := strings.Index(uri, "/beats/elastic-agent/") if index != -1 { - return fmt.Sprintf("%s/beats/elastic-agent/", uri[:index]), nil + if buildID == "" { + // no build id, first is selected + return fmt.Sprintf("%s/beats/elastic-agent/", uri[:index]), nil + } + if strings.Contains(uri, fmt.Sprintf("%s-%s", stripSnapshot(version), buildID)) { + return fmt.Sprintf("%s/beats/elastic-agent/", uri[:index]), nil + } } } - return "", fmt.Errorf("uri not detected") + if buildID == "" { + return "", fmt.Errorf("uri not detected") + } + return "", fmt.Errorf("uri not detected with specific buildid %s", buildID) +} + +func splitBuildID(version string) (string, string) { + split := strings.SplitN(version, "+", 2) + if len(split) == 1 { + // no build ID + return split[0], "" + } + return split[0], split[1] +} + +func stripSnapshot(version string) string { + if strings.HasSuffix(version, "-SNAPSHOT") { + return strings.TrimSuffix(version, "-SNAPSHOT") + } + return version } func DownloadPackage(ctx context.Context, l Logger, doer httpDoer, downloadPath string, packageFile string) error { diff --git a/pkg/testing/fixture.go b/pkg/testing/fixture.go index b1439f7162c..24384168120 100644 --- a/pkg/testing/fixture.go +++ b/pkg/testing/fixture.go @@ -183,7 +183,7 @@ func (f *Fixture) Prepare(ctx context.Context, components ...UsableComponent) er // configuration. This must be called after `Prepare` is called but before `Run` // or `Install` can be called. func (f *Fixture) Configure(ctx context.Context, yamlConfig []byte) error { - err := f.ensurePrepared(ctx) + err := f.EnsurePrepared(ctx) if err != nil { return err } @@ -205,7 +205,7 @@ func (f *Fixture) WorkDir() string { // SrcPackage returns the location on disk of the elastic agent package used by this fixture. func (f *Fixture) SrcPackage(ctx context.Context) (string, error) { - err := f.ensurePrepared(ctx) + err := f.EnsurePrepared(ctx) if err != nil { return "", err } @@ -248,7 +248,7 @@ func (f *Fixture) Run(ctx context.Context, states ...State) error { } var err error - err = f.ensurePrepared(ctx) + err = f.EnsurePrepared(ctx) if err != nil { return err } @@ -350,7 +350,7 @@ func (f *Fixture) Run(ctx context.Context, states ...State) error { // Exec provides a way of performing subcommand on the prepared Elastic Agent binary. func (f *Fixture) Exec(ctx context.Context, args []string, opts ...process.CmdOption) ([]byte, error) { - err := f.ensurePrepared(ctx) + err := f.EnsurePrepared(ctx) if err != nil { return nil, fmt.Errorf("failed to prepare before exec: %w", err) } @@ -369,7 +369,7 @@ func (f *Fixture) Exec(ctx context.Context, args []string, opts ...process.CmdOp // PrepareAgentCommand creates an exec.Cmd ready to execute an elastic-agent command. func (f *Fixture) PrepareAgentCommand(ctx context.Context, args []string, opts ...process.CmdOption) (*exec.Cmd, error) { - err := f.ensurePrepared(ctx) + err := f.EnsurePrepared(ctx) if err != nil { return nil, fmt.Errorf("failed to prepare before exec: %w", err) } @@ -456,6 +456,23 @@ func (f *Fixture) ExecInspect(ctx context.Context, opts ...process.CmdOption) (A return inspect, err } +// ExecVersion executes the version subcommand on the prepared Elastic Agent binary +// with '--binary-only'. It returns the parsed YAML output. +func (f *Fixture) ExecVersion(ctx context.Context, opts ...process.CmdOption) (AgentVersionOutput, error) { + out, err := f.Exec(ctx, []string{"version", "--binary-only", "--yaml"}, opts...) + version := AgentVersionOutput{} + if uerr := yaml.Unmarshal(out, &version); uerr != nil { + return AgentVersionOutput{}, + fmt.Errorf("could not unmarshal agent version output: %w", + errors.Join(&ExecErr{ + err: err, + Output: out, + }, uerr)) + } + + return version, err +} + // IsHealthy returns if the prepared Elastic Agent reports itself as healthy. // It returns false, err if it cannot determine the state of the agent. // It should work with any 8.6+ agent @@ -468,7 +485,8 @@ func (f *Fixture) IsHealthy(ctx context.Context, opts ...process.CmdOption) (boo return status.State == int(cproto.State_HEALTHY), nil } -func (f *Fixture) ensurePrepared(ctx context.Context) error { +// EnsurePrepared ensures that the fixture has been prepared. +func (f *Fixture) EnsurePrepared(ctx context.Context) error { if f.workDir == "" { return f.Prepare(ctx) } @@ -916,3 +934,23 @@ type AgentInspectOutput struct { Data string `yaml:"data"` } `yaml:"signed"` } + +type AgentBinaryVersion struct { + Version string `yaml:"version"` + Commit string `yaml:"commit"` + BuildTime string `yaml:"build_time"` + Snapshot bool `yaml:"snapshot"` +} + +// String returns the version string. +func (v *AgentBinaryVersion) String() string { + s := v.Version + if v.Snapshot { + s += "-SNAPSHOT" + } + return s +} + +type AgentVersionOutput struct { + Binary AgentBinaryVersion `yaml:"binary"` +} diff --git a/pkg/testing/fixture_install.go b/pkg/testing/fixture_install.go index 469ffdde74f..f7efa062154 100644 --- a/pkg/testing/fixture_install.go +++ b/pkg/testing/fixture_install.go @@ -143,7 +143,8 @@ func (f *Fixture) Install(ctx context.Context, installOpts *InstallOpts, opts .. "keeping the agent installed will jeopardise other tests") } - out, err := f.Uninstall(ctx, &UninstallOpts{Force: true, UninstallToken: f.uninstallToken}) + // don't use current `ctx` as it could be cancelled + out, err := f.Uninstall(context.Background(), &UninstallOpts{Force: true, UninstallToken: f.uninstallToken}) f.setClient(nil) if err != nil && (errors.Is(err, ErrNotInstalled) || diff --git a/pkg/testing/tools/agents.go b/pkg/testing/tools/agents.go index f7ca5c33270..e440a927846 100644 --- a/pkg/testing/tools/agents.go +++ b/pkg/testing/tools/agents.go @@ -105,7 +105,7 @@ func GetAgentIDByHostname(client *kibana.Client, policyID, hostname string) (str return agent.Agent.ID, nil } -func UpgradeAgent(client *kibana.Client, policyID, version string) error { +func UpgradeAgent(client *kibana.Client, policyID, version string, force bool) error { hostname, err := os.Hostname() if err != nil { return err @@ -118,6 +118,7 @@ func UpgradeAgent(client *kibana.Client, policyID, version string) error { upgradeAgentReq := kibana.UpgradeAgentRequest{ ID: agentID, Version: version, + Force: force, } _, err = client.UpgradeAgent(context.Background(), upgradeAgentReq) if err != nil { diff --git a/pkg/utils/watcher.go b/pkg/utils/watcher.go new file mode 100644 index 00000000000..fa018b704b3 --- /dev/null +++ b/pkg/utils/watcher.go @@ -0,0 +1,45 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package utils + +import ( + "fmt" + "path/filepath" + "strings" + + "github.com/elastic/elastic-agent-system-metrics/metric/system/process" +) + +// GetWatcherPIDs returns the PID's of any running `elastic-agent watch` process. +func GetWatcherPIDs() ([]int, error) { + procStats := process.Stats{ + // filtering with '.*elastic-agent' or '^.*elastic-agent$' doesn't + // seem to work as expected, filtering is done in the for loop below + Procs: []string{".*"}, + } + err := procStats.Init() + if err != nil { + return nil, fmt.Errorf("failed to initialize process.Stats: %w", err) + } + pidMap, _, err := procStats.FetchPids() + if err != nil { + return nil, fmt.Errorf("failed to fetch pids: %w", err) + } + var pids []int + for pid, state := range pidMap { + if len(state.Args) < 2 { + // must have at least 2 args "elastic-agent[.exe] watch" + continue + } + // instead of matching on Windows using the specific '.exe' suffix, this ensures + // that even if the watcher is spawned without the '.exe' suffix (which Windows will allow and supports) + // it always results in the watch process being killed + if strings.TrimSuffix(filepath.Base(state.Args[0]), ".exe") == "elastic-agent" && state.Args[1] == "watch" { + // it is a watch subprocess + pids = append(pids, pid) + } + } + return pids, nil +} diff --git a/sonar-project.properties b/sonar-project.properties index da643cd741e..b1334c80e22 100644 --- a/sonar-project.properties +++ b/sonar-project.properties @@ -3,7 +3,7 @@ sonar.projectKey=elastic_elastic-agent sonar.host.url=https://sonarcloud.io sonar.sources=. -sonar.exclusions=**/*_test.go, .git/**, dev-tools/**, /magefile.go, changelog/**, _meta/**, deploy/**, docs/**, img/**, specs/**, pkg/testing/** +sonar.exclusions=**/*_test.go, .git/**, dev-tools/**, /magefile.go, changelog/**, _meta/**, deploy/**, docs/**, img/**, specs/**, pkg/testing/**, pkg/component/fake/**, testing/**, **/mocks/*.go sonar.tests=. sonar.test.inclusions=**/*_test.go diff --git a/testing/integration/upgrade_broken_package_test.go b/testing/integration/upgrade_broken_package_test.go new file mode 100644 index 00000000000..a9f090d132b --- /dev/null +++ b/testing/integration/upgrade_broken_package_test.go @@ -0,0 +1,92 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +//go:build integration + +package integration + +import ( + "context" + "fmt" + "io/fs" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + atesting "github.com/elastic/elastic-agent/pkg/testing" + "github.com/elastic/elastic-agent/pkg/testing/define" + "github.com/elastic/elastic-agent/testing/upgradetest" + agtversion "github.com/elastic/elastic-agent/version" +) + +func TestUpgradeBrokenPackageVersion(t *testing.T) { + define.Require(t, define.Requirements{ + Local: false, // requires Agent installation + Sudo: true, // requires Agent installation + }) + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + // Start at the build version as we want to test the retry + // logic that is in the build. + startFixture, err := define.NewFixture(t, define.Version()) + require.NoError(t, err) + + // Upgrade to an old build. + upgradeToVersion, err := upgradetest.PreviousMinor(ctx, define.Version()) + require.NoError(t, err) + endFixture, err := atesting.NewFixture( + t, + upgradeToVersion, + atesting.WithFetcher(atesting.ArtifactFetcher()), + ) + require.NoError(t, err) + + // Pre-upgrade remove the package version files. + preUpgradeHook := func() error { + // get rid of the package version files in the installed directory + return removePackageVersionFiles(t, startFixture) + } + + t.Logf("Testing Elastic Agent upgrade from %s to %s...", define.Version(), upgradeToVersion) + + err = upgradetest.PerformUpgrade(ctx, startFixture, endFixture, t, upgradetest.WithPreUpgradeHook(preUpgradeHook)) + assert.NoError(t, err) +} + +func removePackageVersionFiles(t *testing.T, f *atesting.Fixture) error { + installFS := os.DirFS(f.WorkDir()) + matches := []string{} + + err := fs.WalkDir(installFS, ".", func(path string, d fs.DirEntry, err error) error { + if err != nil { + return err + } + + if d.Name() == agtversion.PackageVersionFileName { + matches = append(matches, path) + } + return nil + }) + if err != nil { + return err + } + + t.Logf("package version files found: %v", matches) + + // the version files should have been removed from the other test, we just make sure + for _, m := range matches { + vFile := filepath.Join(f.WorkDir(), m) + t.Logf("removing package version file %q", vFile) + err = os.Remove(vFile) + if err != nil { + return fmt.Errorf("error removing package version file %q: %w", vFile, err) + } + } + return nil +} diff --git a/testing/integration/upgrade_downgrade_test.go b/testing/integration/upgrade_downgrade_test.go new file mode 100644 index 00000000000..0fa42552340 --- /dev/null +++ b/testing/integration/upgrade_downgrade_test.go @@ -0,0 +1,87 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +//go:build integration + +package integration + +import ( + "context" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + atesting "github.com/elastic/elastic-agent/pkg/testing" + "github.com/elastic/elastic-agent/pkg/testing/define" + "github.com/elastic/elastic-agent/pkg/testing/tools" + "github.com/elastic/elastic-agent/pkg/version" + "github.com/elastic/elastic-agent/testing/upgradetest" +) + +func TestStandaloneDowngradeToSpecificSnapshotBuild(t *testing.T) { + define.Require(t, define.Requirements{ + Local: false, // requires Agent installation + Sudo: true, // requires Agent installation + }) + + // support for upgrading to a specific snapshot build was not + // added until 8.9. + minVersion := upgradetest.Version_8_9_0_SNAPSHOT + pv, err := version.ParseVersion(define.Version()) + if pv.Less(*minVersion) { + t.Skipf("Version %s is lower than min version %s", define.Version(), minVersion) + } + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + // retrieve all the versions of agent from the artifact API + aac := tools.NewArtifactAPIClient() + latestSnapshotVersion, err := tools.GetLatestSnapshotVersion(ctx, t, aac) + require.NoError(t, err) + + // get all the builds of the snapshot version (need to pass x.y.z-SNAPSHOT format) + // 2 builds are required to be available for the test to work. This is because + // if we upgrade to the latest build there would be no difference then passing the version + // string without the buildid, being agent would pick that build anyway. + // We pick the build that is 2 builds back to upgrade to, to ensure that the buildid is + // working correctly and agent is not only picking the latest build. + builds, err := aac.GetBuildsForVersion(ctx, latestSnapshotVersion.VersionWithPrerelease()) + require.NoError(t, err) + if len(builds.Builds) < 2 { + t.Skipf("not enough SNAPSHOT builds exist for version %s. Found %d", latestSnapshotVersion.VersionWithPrerelease(), len(builds.Builds)) + } + + // find the specific build to use for the test + upgradeVersionString := builds.Builds[1] + buildFragments := strings.Split(upgradeVersionString, "-") + require.Lenf(t, buildFragments, 2, "version %q returned by artifact api is not in format -", upgradeVersionString) + endParsedVersion := version.NewParsedSemVer( + latestSnapshotVersion.Major(), + latestSnapshotVersion.Minor(), + latestSnapshotVersion.Patch(), + latestSnapshotVersion.Prerelease(), + buildFragments[1], + ) + + // Start at the build version as we want to test the retry + // logic that is in the build. + startFixture, err := define.NewFixture(t, define.Version()) + require.NoError(t, err) + + // Upgrade to the specific build. + endFixture, err := atesting.NewFixture( + t, + endParsedVersion.String(), + atesting.WithFetcher(atesting.ArtifactFetcher()), + ) + require.NoError(t, err) + + t.Logf("Testing Elastic Agent upgrade from %s to %s...", define.Version(), endParsedVersion.String()) + + err = upgradetest.PerformUpgrade(ctx, startFixture, endFixture, t) + assert.NoError(t, err) +} diff --git a/testing/integration/upgrade_fleet_test.go b/testing/integration/upgrade_fleet_test.go new file mode 100644 index 00000000000..b638f4570d5 --- /dev/null +++ b/testing/integration/upgrade_fleet_test.go @@ -0,0 +1,175 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +//go:build integration + +package integration + +import ( + "context" + "strings" + "testing" + "time" + + "github.com/google/uuid" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/elastic/elastic-agent-libs/kibana" + + atesting "github.com/elastic/elastic-agent/pkg/testing" + "github.com/elastic/elastic-agent/pkg/testing/define" + "github.com/elastic/elastic-agent/pkg/testing/tools" + "github.com/elastic/elastic-agent/pkg/version" + "github.com/elastic/elastic-agent/testing/upgradetest" +) + +// TestFleetManagedUpgrade tests that the build under test can retrieve an action from +// Fleet and perform the upgrade. It does not need to test all the combinations of +// versions as the standalone tests already perform those tests and would be redundant. +func TestFleetManagedUpgrade(t *testing.T) { + info := define.Require(t, define.Requirements{ + Stack: &define.Stack{}, + Local: false, // requires Agent installation + Sudo: true, // requires Agent installation + }) + + ctx, cancel := context.WithCancel(context.TODO()) + defer cancel() + + // Start at the build version as we want to test the retry + // logic that is in the build. + startFixture, err := define.NewFixture(t, define.Version()) + require.NoError(t, err) + err = startFixture.Prepare(ctx) + require.NoError(t, err) + startVersionInfo, err := startFixture.ExecVersion(ctx) + require.NoError(t, err) + + // Upgrade to a different build but of the same version (always a snapshot). + // In the case there is not a different build then the test is skipped. + // Fleet doesn't allow a downgrade to occur, so we cannot go to a lower version. + sameVersion := define.Version() + if !strings.HasSuffix(sameVersion, "-SNAPSHOT") { + sameVersion += "-SNAPSHOT" + } + endFixture, err := atesting.NewFixture( + t, + sameVersion, + atesting.WithFetcher(atesting.ArtifactFetcher()), + ) + require.NoError(t, err) + err = endFixture.Prepare(ctx) + require.NoError(t, err) + endVersionInfo, err := endFixture.ExecVersion(ctx) + require.NoError(t, err) + if startVersionInfo.Binary.String() == endVersionInfo.Binary.String() && startVersionInfo.Binary.Commit == endVersionInfo.Binary.Commit { + t.Skipf("Build under test is the same as the build from the artifacts repository (version: %s) [commit: %s]", startVersionInfo.Binary.String(), startVersionInfo.Binary.Commit) + } + + t.Logf("Testing Elastic Agent upgrade from %s to %s with Fleet...", define.Version(), endVersionInfo.Binary.String()) + + testUpgradeFleetManagedElasticAgent(ctx, t, info, startFixture, endFixture) +} + +func testUpgradeFleetManagedElasticAgent(ctx context.Context, t *testing.T, info *define.Info, startFixture *atesting.Fixture, endFixture *atesting.Fixture) { + startVersionInfo, err := startFixture.ExecVersion(ctx) + require.NoError(t, err) + startParsedVersion, err := version.ParseVersion(startVersionInfo.Binary.String()) + require.NoError(t, err) + endVersionInfo, err := endFixture.ExecVersion(ctx) + require.NoError(t, err) + + kibClient := info.KibanaClient + policyUUID := uuid.New().String() + + t.Log("Creating Agent policy...") + createPolicyReq := kibana.AgentPolicy{ + Name: "test-policy-" + policyUUID, + Namespace: "default", + Description: "Test policy " + policyUUID, + MonitoringEnabled: []kibana.MonitoringEnabledOption{ + kibana.MonitoringEnabledLogs, + kibana.MonitoringEnabledMetrics, + }, + } + policy, err := kibClient.CreatePolicy(ctx, createPolicyReq) + require.NoError(t, err) + + t.Log("Creating Agent enrollment API key...") + createEnrollmentApiKeyReq := kibana.CreateEnrollmentAPIKeyRequest{ + PolicyID: policy.ID, + } + enrollmentToken, err := kibClient.CreateEnrollmentAPIKey(ctx, createEnrollmentApiKeyReq) + require.NoError(t, err) + + t.Log("Getting default Fleet Server URL...") + fleetServerURL, err := tools.GetDefaultFleetServerURL(kibClient) + require.NoError(t, err) + + t.Log("Enrolling Elastic Agent...") + var nonInteractiveFlag bool + if upgradetest.Version_8_2_0.Less(*startParsedVersion) { + nonInteractiveFlag = true + } + installOpts := atesting.InstallOpts{ + NonInteractive: nonInteractiveFlag, + Force: true, + EnrollOpts: atesting.EnrollOpts{ + URL: fleetServerURL, + EnrollmentToken: enrollmentToken.APIKey, + }, + } + output, err := startFixture.Install(ctx, &installOpts) + require.NoError(t, err, "failed to install start agent [output: %s]", string(output)) + t.Cleanup(func() { + t.Log("Un-enrolling Elastic Agent...") + assert.NoError(t, tools.UnEnrollAgent(info.KibanaClient, policy.ID)) + }) + + t.Log("Waiting for Agent to be correct version and healthy...") + err = upgradetest.WaitHealthyAndVersion(ctx, startFixture, startVersionInfo.Binary, 2*time.Minute, 10*time.Second, t) + require.NoError(t, err) + + t.Log("Waiting for enrolled Agent status to be online...") + require.Eventually(t, tools.WaitForAgentStatus(t, kibClient, policy.ID, "online"), 2*time.Minute, 10*time.Second, "Agent status is not online") + + t.Logf("Upgrading from version %q to version %q...", startParsedVersion, endVersionInfo.Binary.String()) + err = tools.UpgradeAgent(kibClient, policy.ID, endVersionInfo.Binary.String(), true) + require.NoError(t, err) + + // wait for the watcher to show up + t.Logf("Waiting for upgrade watcher to start...") + err = upgradetest.WaitForWatcher(ctx, 2*time.Minute, 10*time.Second) + require.NoError(t, err) + t.Logf("Upgrade watcher started") + + // wait for the agent to be healthy and correct version + err = upgradetest.WaitHealthyAndVersion(ctx, startFixture, endVersionInfo.Binary, 2*time.Minute, 10*time.Second, t) + require.NoError(t, err) + + t.Log("Waiting for enrolled Agent status to be online...") + require.Eventually(t, tools.WaitForAgentStatus(t, kibClient, policy.ID, "online"), 10*time.Minute, 15*time.Second, "Agent status is not online") + + // wait for version + require.Eventually(t, func() bool { + t.Log("Getting Agent version...") + newVersion, err := tools.GetAgentVersion(kibClient, policy.ID) + if err != nil { + t.Logf("error getting agent version: %v", err) + return false + } + return endVersionInfo.Binary.Version == newVersion + }, 5*time.Minute, time.Second) + + t.Logf("Waiting for upgrade watcher to finish...") + err = upgradetest.WaitForNoWatcher(ctx, 2*time.Minute, 10*time.Second, 1*time.Minute+15*time.Second) + require.NoError(t, err) + t.Logf("Upgrade watcher finished") + + // now that the watcher has stopped lets ensure that it's still the expected + // version, otherwise it's possible that it was rolled back to the original version + err = upgradetest.CheckHealthyAndVersion(ctx, startFixture, endVersionInfo.Binary) + assert.NoError(t, err) +} diff --git a/testing/integration/upgrade_gpg_test.go b/testing/integration/upgrade_gpg_test.go new file mode 100644 index 00000000000..56d4378d147 --- /dev/null +++ b/testing/integration/upgrade_gpg_test.go @@ -0,0 +1,132 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +//go:build integration + +package integration + +import ( + "context" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/elastic/elastic-agent/internal/pkg/release" + atesting "github.com/elastic/elastic-agent/pkg/testing" + "github.com/elastic/elastic-agent/pkg/testing/define" + "github.com/elastic/elastic-agent/pkg/version" + "github.com/elastic/elastic-agent/testing/upgradetest" +) + +func TestStandaloneUpgradeWithGPGFallback(t *testing.T) { + define.Require(t, define.Requirements{ + Local: false, // requires Agent installation + Sudo: true, // requires Agent installation + }) + + minVersion := upgradetest.Version_8_10_0_SNAPSHOT + fromVersion, err := version.ParseVersion(define.Version()) + require.NoError(t, err) + + if fromVersion.Less(*minVersion) { + t.Skipf("Version %s is lower than min version %s", define.Version(), minVersion) + } + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + // Start at the build version as we want to test the retry + // logic that is in the build. + startFixture, err := define.NewFixture(t, define.Version()) + require.NoError(t, err) + + // Upgrade to an old build. + upgradeToVersion, err := upgradetest.PreviousMinor(ctx, define.Version()) + require.NoError(t, err) + endFixture, err := atesting.NewFixture( + t, + upgradeToVersion, + atesting.WithFetcher(atesting.ArtifactFetcher()), + ) + require.NoError(t, err) + + t.Logf("Testing Elastic Agent upgrade from %s to %s...", define.Version(), upgradeToVersion) + + _, defaultPGP := release.PGP() + firstSeven := string(defaultPGP[:7]) + newPgp := strings.Replace( + string(defaultPGP), + firstSeven, + "abcDEFg", + 1, + ) + + customPGP := upgradetest.CustomPGP{ + PGP: newPgp, + } + + err = upgradetest.PerformUpgrade( + ctx, startFixture, endFixture, t, + upgradetest.WithSourceURI(""), + upgradetest.WithCustomPGP(customPGP), + upgradetest.WithSkipVerify(false)) + assert.NoError(t, err) +} + +func TestStandaloneUpgradeWithGPGFallbackOneRemoteFailing(t *testing.T) { + define.Require(t, define.Requirements{ + Local: false, // requires Agent installation + Sudo: true, // requires Agent installation + }) + + minVersion := upgradetest.Version_8_10_0_SNAPSHOT + fromVersion, err := version.ParseVersion(define.Version()) + require.NoError(t, err) + + if fromVersion.Less(*minVersion) { + t.Skipf("Version %s is lower than min version %s", define.Version(), minVersion) + } + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + // Start at the build version as we want to test the retry + // logic that is in the build. + startFixture, err := define.NewFixture(t, define.Version()) + require.NoError(t, err) + + // Upgrade to an old build. + upgradeToVersion, err := upgradetest.PreviousMinor(ctx, define.Version()) + require.NoError(t, err) + endFixture, err := atesting.NewFixture( + t, + upgradeToVersion, + atesting.WithFetcher(atesting.ArtifactFetcher()), + ) + require.NoError(t, err) + + t.Logf("Testing Elastic Agent upgrade from %s to %s...", define.Version(), upgradeToVersion) + + _, defaultPGP := release.PGP() + firstSeven := string(defaultPGP[:7]) + newPgp := strings.Replace( + string(defaultPGP), + firstSeven, + "abcDEFg", + 1, + ) + + customPGP := upgradetest.CustomPGP{ + PGP: newPgp, + PGPUri: "https://127.0.0.1:3456/non/existing/path", + } + + err = upgradetest.PerformUpgrade( + ctx, startFixture, endFixture, t, + upgradetest.WithSourceURI(""), + upgradetest.WithCustomPGP(customPGP), + upgradetest.WithSkipVerify(false)) +} diff --git a/testing/integration/upgrade_rollback_test.go b/testing/integration/upgrade_rollback_test.go new file mode 100644 index 00000000000..fa9477de585 --- /dev/null +++ b/testing/integration/upgrade_rollback_test.go @@ -0,0 +1,196 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +//go:build integration + +package integration + +import ( + "context" + "errors" + "fmt" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/elastic/elastic-agent/internal/pkg/agent/application/paths" + "github.com/elastic/elastic-agent/internal/pkg/agent/install" + atesting "github.com/elastic/elastic-agent/pkg/testing" + "github.com/elastic/elastic-agent/pkg/testing/define" + "github.com/elastic/elastic-agent/testing/upgradetest" +) + +// TestStandaloneUpgradeRollback tests the scenario where upgrading to a new version +// of Agent fails due to the new Agent binary reporting an unhealthy status. It checks +// that the Agent is rolled back to the previous version. +func TestStandaloneUpgradeRollback(t *testing.T) { + define.Require(t, define.Requirements{ + Local: false, // requires Agent installation + Sudo: true, // requires Agent installation + }) + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + // Start at the build version as we want to test the retry + // logic that is in the build. + startFixture, err := define.NewFixture(t, define.Version()) + require.NoError(t, err) + startVersionInfo, err := startFixture.ExecVersion(ctx) + require.NoError(t, err, "failed to get start agent build version info") + + // Upgrade to an old build. + upgradeToVersion, err := upgradetest.PreviousMinor(ctx, define.Version()) + require.NoError(t, err) + endFixture, err := atesting.NewFixture( + t, + upgradeToVersion, + atesting.WithFetcher(atesting.ArtifactFetcher()), + ) + require.NoError(t, err) + + t.Logf("Testing Elastic Agent upgrade from %s to %s...", define.Version(), upgradeToVersion) + + // Configure Agent with fast watcher configuration and also an invalid + // input when the Agent version matches the upgraded Agent version. This way + // the pre-upgrade version of the Agent runs healthy, but the post-upgrade + // version doesn't. + preInstallHook := func() error { + invalidInputPolicy := upgradetest.FastWatcherCfg + fmt.Sprintf(` +outputs: + default: + type: elasticsearch + hosts: [127.0.0.1:9200] + +inputs: + - condition: '${agent.version.version} == "%s"' + type: invalid + id: invalid-input +`, upgradeToVersion) + return startFixture.Configure(ctx, []byte(invalidInputPolicy)) + } + + // Use the post-upgrade hook to bypass the remainder of the PerformUpgrade + // because we want to do our own checks for the rollback. + var ErrPostExit = errors.New("post exit") + postUpgradeHook := func() error { + return ErrPostExit + } + + err = upgradetest.PerformUpgrade( + ctx, startFixture, endFixture, t, + upgradetest.WithPreInstallHook(preInstallHook), + upgradetest.WithPostUpgradeHook(postUpgradeHook)) + if !errors.Is(err, ErrPostExit) { + require.NoError(t, err) + } + + // rollback should now occur + + // wait for the agent to be healthy and back at the start version + err = upgradetest.WaitHealthyAndVersion(ctx, startFixture, startVersionInfo.Binary, 10*time.Minute, 10*time.Second, t) + if err != nil { + // agent never got healthy, but we need to ensure the watcher is stopped before continuing (this + // prevents this test failure from interfering with another test) + // this kills the watcher instantly and waits for it to be gone before continuing + watcherErr := upgradetest.WaitForNoWatcher(ctx, 1*time.Minute, time.Second, 100*time.Millisecond) + if watcherErr != nil { + t.Logf("failed to kill watcher due to agent not becoming healthy: %s", watcherErr) + } + } + require.NoError(t, err) + + // rollback should stop the watcher + // killTimeout is greater than timeout as the watcher should have been + // stopped on its own, and we don't want this test to hide that fact + err = upgradetest.WaitForNoWatcher(ctx, 2*time.Minute, 10*time.Second, 3*time.Minute) + require.NoError(t, err) + + // now that the watcher has stopped lets ensure that it's still the expected + // version, otherwise it's possible that it was rolled back to the original version + err = upgradetest.CheckHealthyAndVersion(ctx, startFixture, startVersionInfo.Binary) + assert.NoError(t, err) +} + +// TestStandaloneUpgradeRollbackOnRestarts tests the scenario where upgrading to a new version +// of Agent fails due to the new Agent binary not starting up. It checks that the Agent is +// rolled back to the previous version. +func TestStandaloneUpgradeRollbackOnRestarts(t *testing.T) { + define.Require(t, define.Requirements{ + Local: false, // requires Agent installation + Sudo: true, // requires Agent installation + }) + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + // Start at the build version as we want to test the retry + // logic that is in the build. + startFixture, err := define.NewFixture(t, define.Version()) + require.NoError(t, err) + startVersionInfo, err := startFixture.ExecVersion(ctx) + require.NoError(t, err, "failed to get start agent build version info") + + // Upgrade to an old build. + upgradeToVersion, err := upgradetest.PreviousMinor(ctx, define.Version()) + require.NoError(t, err) + endFixture, err := atesting.NewFixture( + t, + upgradeToVersion, + atesting.WithFetcher(atesting.ArtifactFetcher()), + ) + require.NoError(t, err) + + t.Logf("Testing Elastic Agent upgrade from %s to %s...", define.Version(), upgradeToVersion) + + // Use the post-upgrade hook to bypass the remainder of the PerformUpgrade + // because we want to do our own checks for the rollback. + var ErrPostExit = errors.New("post exit") + postUpgradeHook := func() error { + return ErrPostExit + } + + err = upgradetest.PerformUpgrade( + ctx, startFixture, endFixture, t, + upgradetest.WithPostUpgradeHook(postUpgradeHook)) + if !errors.Is(err, ErrPostExit) { + require.NoError(t, err) + } + + // A few seconds after the upgrade, deliberately restart upgraded Agent a + // couple of times to simulate Agent crashing. + for restartIdx := 0; restartIdx < 3; restartIdx++ { + time.Sleep(10 * time.Second) + topPath := paths.Top() + + t.Logf("Restarting Agent via service to simulate crashing") + err = install.RestartService(topPath) + require.NoError(t, err) + } + + // wait for the agent to be healthy and back at the start version + err = upgradetest.WaitHealthyAndVersion(ctx, startFixture, startVersionInfo.Binary, 10*time.Minute, 10*time.Second, t) + if err != nil { + // agent never got healthy, but we need to ensure the watcher is stopped before continuing + // this kills the watcher instantly and waits for it to be gone before continuing + watcherErr := upgradetest.WaitForNoWatcher(ctx, 1*time.Minute, time.Second, 100*time.Millisecond) + if watcherErr != nil { + t.Logf("failed to kill watcher due to agent not becoming healthy: %s", watcherErr) + } + } + require.NoError(t, err) + + // rollback should stop the watcher + // killTimeout is greater than timeout as the watcher should have been + // stopped on its own, and we don't want this test to hide that fact + err = upgradetest.WaitForNoWatcher(ctx, 2*time.Minute, 10*time.Second, 3*time.Minute) + require.NoError(t, err) + + // now that the watcher has stopped lets ensure that it's still the expected + // version, otherwise it's possible that it was rolled back to the original version + err = upgradetest.CheckHealthyAndVersion(ctx, startFixture, startVersionInfo.Binary) + assert.NoError(t, err) +} diff --git a/testing/integration/upgrade_standalone_inprogress.go b/testing/integration/upgrade_standalone_inprogress.go new file mode 100644 index 00000000000..adc52cf0872 --- /dev/null +++ b/testing/integration/upgrade_standalone_inprogress.go @@ -0,0 +1,98 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package integration + +import ( + "context" + "errors" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + atesting "github.com/elastic/elastic-agent/pkg/testing" + "github.com/elastic/elastic-agent/pkg/testing/define" + "github.com/elastic/elastic-agent/testing/upgradetest" +) + +// TestStandaloneUpgradeFailsWhenUpgradeIsInProgress initiates an upgrade for a +// standalone Elastic Agent and, while that upgrade is still in progress, attempts +// to initiate a second upgrade. The test expects Elastic Agent to not allow +// the second upgrade. +func TestStandaloneUpgradeFailsWhenUpgradeIsInProgress(t *testing.T) { + define.Require(t, define.Requirements{ + Local: false, // requires Agent installation + Sudo: true, // requires Agent installation + }) + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + // For this test we start with a version of Agent that's two minors older + // than the current version and upgrade to the current version. Then we attempt + // upgrading to the current version again, expecting Elastic Agent to disallow + // this second upgrade. + upgradeFromVersion, err := upgradetest.PreviousMinor(ctx, define.Version()) + require.NoError(t, err) + startFixture, err := atesting.NewFixture( + t, + upgradeFromVersion, + atesting.WithFetcher(atesting.ArtifactFetcher()), + ) + require.NoError(t, err, "error creating previous agent fixture") + + endFixture, err := define.NewFixture(t, define.Version()) + require.NoError(t, err) + endVersionInfo, err := endFixture.ExecVersion(ctx) + require.NoError(t, err) + + // Use the post-upgrade hook to bypass the remainder of the PerformUpgrade + // because we want to do our own checks for the rollback. + var ErrPostExit = errors.New("post exit") + postUpgradeHook := func() error { + return ErrPostExit + } + + err = upgradetest.PerformUpgrade( + ctx, startFixture, endFixture, t, + upgradetest.WithPostUpgradeHook(postUpgradeHook)) + if !errors.Is(err, ErrPostExit) { + require.NoError(t, err) + } + + // upgrade has occurred and the watcher is running + + // wait for the agent to be healthy and correct version + // before trying to perform another upgrade + err = upgradetest.WaitHealthyAndVersion(ctx, startFixture, endVersionInfo.Binary, 2*time.Minute, 10*time.Second, t) + if err != nil { + // agent never got healthy, but we need to ensure the watcher is stopped before continuing (this + // prevents this test failure from interfering with another test) + // this kills the watcher instantly and waits for it to be gone before continuing + watcherErr := upgradetest.WaitForNoWatcher(ctx, 1*time.Minute, time.Second, 100*time.Millisecond) + if watcherErr != nil { + t.Logf("failed to kill watcher due to agent not becoming healthy: %s", watcherErr) + } + } + require.NoError(t, err) + + // trigger another upgrade that should fail because the current watcher + // is still running and another upgrade can be performed until it is complete + upgradeCmdArgs := []string{"upgrade", endVersionInfo.Binary.String()} + upgradeOutput, err := startFixture.Exec(ctx, upgradeCmdArgs) + require.NotNil(t, err) + require.Contains(t, string(upgradeOutput), "an upgrade is already in progress; please try again later.") + + // killTimeout is greater than timeout as the watcher should have been + // stopped on its own, and we don't want this test to hide that fact + err = upgradetest.WaitForNoWatcher(ctx, 2*time.Minute, 10*time.Second, 3*time.Minute) + require.NoError(t, err) + + // now that the watcher has stopped lets ensure that it's still the expected + // version, otherwise it's possible that it was rolled back to the original version + err = upgradetest.CheckHealthyAndVersion(ctx, startFixture, endVersionInfo.Binary) + assert.NoError(t, err) +} diff --git a/testing/integration/upgrade_standalone_retry_test.go b/testing/integration/upgrade_standalone_retry_test.go new file mode 100644 index 00000000000..00e74b4cef0 --- /dev/null +++ b/testing/integration/upgrade_standalone_retry_test.go @@ -0,0 +1,96 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +//go:build integration + +package integration + +import ( + "context" + "fmt" + "net" + "net/http" + "path" + "path/filepath" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + atesting "github.com/elastic/elastic-agent/pkg/testing" + "github.com/elastic/elastic-agent/pkg/testing/define" + "github.com/elastic/elastic-agent/testing/upgradetest" +) + +func TestStandaloneUpgradeRetryDownload(t *testing.T) { + define.Require(t, define.Requirements{ + Local: false, // requires Agent installation + Sudo: true, // requires Agent installation + }) + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + // Start at the build version as we want to test the retry + // logic that is in the build. + startFixture, err := define.NewFixture(t, define.Version()) + require.NoError(t, err) + + // Upgrade to an old build. + upgradeToVersion, err := upgradetest.PreviousMinor(ctx, define.Version()) + require.NoError(t, err) + endFixture, err := atesting.NewFixture( + t, + upgradeToVersion, + atesting.WithFetcher(atesting.ArtifactFetcher()), + ) + require.NoError(t, err) + + t.Logf("Testing Elastic Agent upgrade from %s to %s...", define.Version(), upgradeToVersion) + + // uses an internal http server that returns bad requests + // until it returns a successful request + srcPackage, err := endFixture.SrcPackage(ctx) + require.NoError(t, err) + + l, err := net.Listen("tcp", ":0") + require.NoError(t, err) + defer l.Close() + port := l.Addr().(*net.TCPAddr).Port + + count := 0 + fs := http.FileServer(http.Dir(filepath.Dir(srcPackage))) + handler := http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) { + + // fix path to remove '/beats/elastic-agent/' prefix + upath := r.URL.Path + if !strings.HasPrefix(upath, "/") { + upath = "/" + upath + } + if strings.HasPrefix(upath, "/beats/elastic-agent/") { + upath = strings.TrimPrefix(upath, "/beats/elastic-agent/") + } + r.URL.Path = upath + + if path.Base(r.URL.Path) == filepath.Base(srcPackage) && count < 2 { + // first 2 requests return 404 + count += 1 + t.Logf("request #%d; returning not found", count) + rw.WriteHeader(http.StatusNotFound) + return + } + + fs.ServeHTTP(rw, r) + }) + + go func() { + _ = http.Serve(l, handler) + }() + + sourceURI := fmt.Sprintf("http://localhost:%d", port) + err = upgradetest.PerformUpgrade(ctx, startFixture, endFixture, t, upgradetest.WithSourceURI(sourceURI)) + assert.NoError(t, err) + assert.Equal(t, 2, count, "retry request didn't occur") +} diff --git a/testing/integration/upgrade_standalone_test.go b/testing/integration/upgrade_standalone_test.go new file mode 100644 index 00000000000..f1d97bd12b5 --- /dev/null +++ b/testing/integration/upgrade_standalone_test.go @@ -0,0 +1,51 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +//go:build integration + +package integration + +import ( + "context" + "fmt" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + atesting "github.com/elastic/elastic-agent/pkg/testing" + "github.com/elastic/elastic-agent/pkg/testing/define" + "github.com/elastic/elastic-agent/testing/upgradetest" +) + +func TestStandaloneUpgrade(t *testing.T) { + define.Require(t, define.Requirements{ + Local: false, // requires Agent installation + Sudo: true, // requires Agent installation + }) + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + // test 2 current 8.x version and 1 previous 7.x version + versionList, err := upgradetest.GetUpgradableVersions(ctx, define.Version(), 2, 1) + require.NoError(t, err) + + for _, startVersion := range versionList { + t.Run(fmt.Sprintf("Upgrade %s to %s", startVersion, define.Version()), func(t *testing.T) { + startFixture, err := atesting.NewFixture( + t, + startVersion.String(), + atesting.WithFetcher(atesting.ArtifactFetcher()), + ) + require.NoError(t, err, "error creating previous agent fixture") + + endFixture, err := define.NewFixture(t, define.Version()) + require.NoError(t, err) + + err = upgradetest.PerformUpgrade(ctx, startFixture, endFixture, t) + assert.NoError(t, err) + }) + } +} diff --git a/testing/integration/upgrade_test.go b/testing/integration/upgrade_test.go deleted file mode 100644 index 4f0c6300912..00000000000 --- a/testing/integration/upgrade_test.go +++ /dev/null @@ -1,1095 +0,0 @@ -// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -// or more contributor license agreements. Licensed under the Elastic License; -// you may not use this file except in compliance with the Elastic License. - -//go:build integration - -package integration - -import ( - "context" - "encoding/json" - "fmt" - "io/fs" - "net/http" - "os" - "os/exec" - "path/filepath" - "runtime" - "sort" - "strings" - "sync" - "testing" - "time" - - "gopkg.in/yaml.v2" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - - "github.com/google/uuid" - - "github.com/elastic/elastic-agent-libs/kibana" - - "github.com/elastic/elastic-agent/internal/pkg/agent/application/paths" - cmdVersion "github.com/elastic/elastic-agent/internal/pkg/basecmd/version" - "github.com/elastic/elastic-agent/internal/pkg/release" - v1client "github.com/elastic/elastic-agent/pkg/control/v1/client" - v2client "github.com/elastic/elastic-agent/pkg/control/v2/client" - v2proto "github.com/elastic/elastic-agent/pkg/control/v2/cproto" - atesting "github.com/elastic/elastic-agent/pkg/testing" - "github.com/elastic/elastic-agent/pkg/testing/define" - "github.com/elastic/elastic-agent/pkg/testing/tools" - "github.com/elastic/elastic-agent/pkg/version" - agtversion "github.com/elastic/elastic-agent/version" -) - -// The watcher will need the default 10 minutes to complete for a Fleet managed agent, see https://github.com/elastic/elastic-agent/issues/2977. -const defaultWatcherDuration = 10 * time.Minute - -// Configure standalone agents to complete faster to speed up tests. -const standaloneWatcherDuration = time.Minute - -// Note: this configuration can't apply to Fleet managed upgrades until https://github.com/elastic/elastic-agent/issues/2977 is resolved -var fastWatcherCfg = fmt.Sprintf(` -agent.upgrade.watcher: - grace_period: %s - error_check.interval: 15s - crash_check.interval: 15s -`, standaloneWatcherDuration) - -// notable versions used in tests - -// first version to include --non-interactive flag during install -var version_8_2_0 = version.NewParsedSemVer(8, 2, 0, "", "") - -// first version to use agent v2 protocol -var version_8_6_0 = version.NewParsedSemVer(8, 6, 0, "", "") - -// minimum version for passing --skip-verify when upgrading -var version_8_7_0 = version.NewParsedSemVer(8, 7, 0, "", "") - -// minimum version for upgrade to specific snapshot + minimum version for setting shorter watch period after upgrade -var version_8_9_0_SNAPSHOT = version.NewParsedSemVer(8, 9, 0, "SNAPSHOT", "") - -// minimum version for upgrade with remote pgp and skipping default pgp verification -var version_8_10_0_SNAPSHOT = version.NewParsedSemVer(8, 10, 0, "SNAPSHOT", "") - -func TestFleetManagedUpgrade(t *testing.T) { - info := define.Require(t, define.Requirements{ - Stack: &define.Stack{}, - Local: false, // requires Agent installation - Isolate: false, - Sudo: true, // requires Agent installation - }) - - ctx, cancel := context.WithCancel(context.TODO()) - defer cancel() - - upgradableVersions := getUpgradableVersions(ctx, t, define.Version()) - - for _, parsedVersion := range upgradableVersions { - - t.Run(fmt.Sprintf("Upgrade managed agent from %s to %s", parsedVersion, define.Version()), func(t *testing.T) { - agentFixture, err := atesting.NewFixture( - t, - parsedVersion.String(), - atesting.WithFetcher(atesting.ArtifactFetcher()), - ) - require.NoError(t, err) - err = agentFixture.Prepare(ctx) - require.NoError(t, err, "error preparing agent fixture") - - t.Cleanup(func() { - // The watcher needs to finish before the agent is uninstalled: https://github.com/elastic/elastic-agent/issues/3371 - waitForUpgradeWatcherToComplete(t, agentFixture, parsedVersion, defaultWatcherDuration) - }) - - testUpgradeFleetManagedElasticAgent(t, ctx, info, agentFixture, parsedVersion, define.Version()) - }) - } -} - -func testUpgradeFleetManagedElasticAgent(t *testing.T, ctx context.Context, info *define.Info, agentFixture *atesting.Fixture, parsedFromVersion *version.ParsedSemVer, toVersion string) { - kibClient := info.KibanaClient - policyUUID := uuid.New().String() - - t.Log("Creating Agent policy...") - createPolicyReq := kibana.AgentPolicy{ - Name: "test-policy-" + policyUUID, - Namespace: "default", - Description: "Test policy " + policyUUID, - MonitoringEnabled: []kibana.MonitoringEnabledOption{ - kibana.MonitoringEnabledLogs, - kibana.MonitoringEnabledMetrics, - }, - } - policy, err := kibClient.CreatePolicy(ctx, createPolicyReq) - require.NoError(t, err) - - t.Log("Creating Agent enrollment API key...") - createEnrollmentApiKeyReq := kibana.CreateEnrollmentAPIKeyRequest{ - PolicyID: policy.ID, - } - enrollmentToken, err := kibClient.CreateEnrollmentAPIKey(ctx, createEnrollmentApiKeyReq) - require.NoError(t, err) - - t.Log("Getting default Fleet Server URL...") - fleetServerURL, err := tools.GetDefaultFleetServerURL(kibClient) - require.NoError(t, err) - - t.Log("Enrolling Elastic Agent...") - var nonInteractiveFlag bool - if version_8_2_0.Less(*parsedFromVersion) { - nonInteractiveFlag = true - } - installOpts := atesting.InstallOpts{ - NonInteractive: nonInteractiveFlag, - Force: true, - EnrollOpts: atesting.EnrollOpts{ - URL: fleetServerURL, - EnrollmentToken: enrollmentToken.APIKey, - }, - } - output, err := tools.InstallAgent(installOpts, agentFixture) - if err != nil { - t.Log(string(output)) - } - require.NoError(t, err) - t.Cleanup(func() { - t.Log("Un-enrolling Elastic Agent...") - assert.NoError(t, tools.UnEnrollAgent(info.KibanaClient, policy.ID)) - }) - - t.Log(`Waiting for enrolled Agent status to be "online"...`) - require.Eventually(t, tools.WaitForAgentStatus(t, kibClient, policy.ID, "online"), 2*time.Minute, 10*time.Second, "Agent status is not online") - - t.Logf("Upgrade Elastic Agent to version %s...", toVersion) - err = tools.UpgradeAgent(kibClient, policy.ID, toVersion) - require.NoError(t, err) - - t.Log(`Waiting for enrolled Agent status to be "online"...`) - require.Eventually(t, tools.WaitForAgentStatus(t, kibClient, policy.ID, "online"), 10*time.Minute, 15*time.Second, "Agent status is not online") - - // We remove the `-SNAPSHOT` suffix because, post-upgrade, the version reported - // by the Agent will not contain this suffix, even if a `-SNAPSHOT`-suffixed - // version was used as the target version for the upgrade. - require.Eventually(t, func() bool { - t.Log("Getting Agent version...") - newVersion, err := tools.GetAgentVersion(kibClient, policy.ID) - if err != nil { - t.Logf("error getting agent version: %v", err) - return false - } - return strings.TrimRight(toVersion, `-SNAPSHOT`) == newVersion - }, 5*time.Minute, time.Second) -} - -func TestStandaloneUpgrade(t *testing.T) { - define.Require(t, define.Requirements{ - Local: false, // requires Agent installation - Isolate: true, - Sudo: true, // requires Agent installation - }) - - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - versionList := getUpgradableVersions(ctx, t, define.Version()) - - for _, parsedVersion := range versionList { - - t.Run(fmt.Sprintf("Upgrade %s to %s", parsedVersion, define.Version()), func(t *testing.T) { - agentFixture, err := atesting.NewFixture( - t, - parsedVersion.String(), - atesting.WithFetcher(atesting.ArtifactFetcher()), - ) - - require.NoError(t, err, "error creating fixture") - - err = agentFixture.Prepare(ctx) - require.NoError(t, err, "error preparing agent fixture") - - err = agentFixture.Configure(ctx, []byte(fastWatcherCfg)) - require.NoError(t, err, "error configuring agent fixture") - - t.Cleanup(func() { - // The watcher needs to finish before the agent is uninstalled: https://github.com/elastic/elastic-agent/issues/3371 - waitForUpgradeWatcherToComplete(t, agentFixture, parsedVersion, standaloneWatcherDuration) - }) - - parsedUpgradeVersion, err := version.ParseVersion(define.Version()) - require.NoErrorf(t, err, "define.Version() %q cannot be parsed as agent version", define.Version()) - skipVerify := version_8_7_0.Less(*parsedVersion) - testStandaloneUpgrade(ctx, t, agentFixture, parsedVersion, parsedUpgradeVersion, "", skipVerify, true, false, CustomPGP{}) - }) - } -} - -func TestStandaloneUpgradeWithGPGFallback(t *testing.T) { - define.Require(t, define.Requirements{ - Local: false, // requires Agent installation - Sudo: true, // requires Agent installation - }) - - t.Skip("Fails upgrading to a version that doesn't exist: https://github.com/elastic/elastic-agent/issues/3397") - - minVersion := version_8_10_0_SNAPSHOT - fromVersion, err := version.ParseVersion(define.Version()) - require.NoError(t, err) - - if fromVersion.Less(*minVersion) { - t.Skipf("Version %s is lower than min version %s", define.Version(), minVersion) - } - - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - // previous - toVersion, err := fromVersion.GetPreviousMinor() - require.NoError(t, err, "failed to get previous minor") - agentFixture, err := define.NewFixture( - t, - define.Version(), - ) - require.NoError(t, err, "error creating fixture") - - err = agentFixture.Prepare(ctx) - require.NoError(t, err, "error preparing agent fixture") - - err = agentFixture.Configure(ctx, []byte(fastWatcherCfg)) - require.NoError(t, err, "error configuring agent fixture") - - t.Cleanup(func() { - // The watcher needs to finish before the agent is uninstalled: https://github.com/elastic/elastic-agent/issues/3371 - waitForUpgradeWatcherToComplete(t, agentFixture, fromVersion, standaloneWatcherDuration) - }) - - _, defaultPGP := release.PGP() - firstSeven := string(defaultPGP[:7]) - newPgp := strings.Replace( - string(defaultPGP), - firstSeven, - "abcDEFg", - 1, - ) - - customPGP := CustomPGP{ - PGP: newPgp, - } - - testStandaloneUpgrade(ctx, t, agentFixture, fromVersion, toVersion, "", false, false, true, customPGP) -} - -func TestStandaloneUpgradeWithGPGFallbackOneRemoteFailing(t *testing.T) { - define.Require(t, define.Requirements{ - Local: false, // requires Agent installation - Sudo: true, // requires Agent installation - }) - - t.Skip("Fails upgrading to a version that doesn't exist: https://github.com/elastic/elastic-agent/issues/3397") - - minVersion := version_8_10_0_SNAPSHOT - fromVersion, err := version.ParseVersion(define.Version()) - require.NoError(t, err) - - if fromVersion.Less(*minVersion) { - t.Skipf("Version %s is lower than min version %s", define.Version(), minVersion) - } - - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - // previous - toVersion, err := fromVersion.GetPreviousMinor() - require.NoError(t, err, "failed to get previous minor") - agentFixture, err := define.NewFixture( - t, - define.Version(), - ) - require.NoError(t, err, "error creating fixture") - - err = agentFixture.Prepare(ctx) - require.NoError(t, err, "error preparing agent fixture") - - err = agentFixture.Configure(ctx, []byte(fastWatcherCfg)) - require.NoError(t, err, "error configuring agent fixture") - - t.Cleanup(func() { - // The watcher needs to finish before the agent is uninstalled: https://github.com/elastic/elastic-agent/issues/3371 - waitForUpgradeWatcherToComplete(t, agentFixture, fromVersion, standaloneWatcherDuration) - }) - - _, defaultPGP := release.PGP() - firstSeven := string(defaultPGP[:7]) - newPgp := strings.Replace( - string(defaultPGP), - firstSeven, - "abcDEFg", - 1, - ) - - customPGP := CustomPGP{ - PGP: newPgp, - PGPUri: "https://127.0.0.1:3456/non/existing/path", - } - - testStandaloneUpgrade(ctx, t, agentFixture, fromVersion, toVersion, "", false, false, true, customPGP) -} - -func TestStandaloneDowngradeToPreviousSnapshotBuild(t *testing.T) { - define.Require(t, define.Requirements{ - Local: false, // requires Agent installation - Sudo: true, // requires Agent installation - }) - - minVersion := version_8_9_0_SNAPSHOT - pv, err := version.ParseVersion(define.Version()) - if pv.Less(*minVersion) { - t.Skipf("Version %s is lower than min version %s", define.Version(), minVersion) - } - - agentFixture, err := define.NewFixture(t, define.Version()) - require.NoError(t, err) - - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - err = agentFixture.Prepare(ctx) - require.NoError(t, err, "error preparing agent fixture") - - err = agentFixture.Configure(ctx, []byte(fastWatcherCfg)) - require.NoError(t, err, "error configuring agent fixture") - - // retrieve all the versions of agent from the artifact API - aac := tools.NewArtifactAPIClient() - latestSnapshotVersion, err := tools.GetLatestSnapshotVersion(ctx, t, aac) - require.NoError(t, err) - - // get all the builds of the snapshot version (need to pass x.y.z-SNAPSHOT format) - builds, err := aac.GetBuildsForVersion(ctx, latestSnapshotVersion.VersionWithPrerelease()) - require.NoError(t, err) - - if len(builds.Builds) < 2 { - t.Skip("there is only one SNAPSHOT version available, " + - "the test requires at least 2 so it can downgrade to the previous" + - "SNAPSHOT") - } - upgradeVersionString := builds.Builds[1] - - t.Logf("Targeting build %q of version %q", upgradeVersionString, latestSnapshotVersion) - - buildDetails, err := aac.GetBuildDetails(ctx, latestSnapshotVersion.VersionWithPrerelease(), upgradeVersionString) - require.NoErrorf(t, err, "error accessing build details for version %q and buildID %q", latestSnapshotVersion.Original(), upgradeVersionString) - require.NotNil(t, buildDetails) - agentProject, ok := buildDetails.Build.Projects["elastic-agent"] - require.Truef(t, ok, "elastic agent project not found in version %q build %q", latestSnapshotVersion.Original(), upgradeVersionString) - t.Logf("agent build details: %+v", agentProject) - t.Logf("expected agent commit hash: %q", agentProject.CommitHash) - expectedAgentHashAfterUpgrade := agentProject.CommitHash - - // Workaround until issue with Artifact API build commit hash are resolved - actualAgentHashAfterUpgrade := extractCommitHashFromArtifact(t, ctx, latestSnapshotVersion, agentProject) - require.NotEmpty(t, actualAgentHashAfterUpgrade) - - t.Logf("Artifact API hash: %q Actual package hash: %q", expectedAgentHashAfterUpgrade, actualAgentHashAfterUpgrade) - - // override the expected hash with the one extracted from the actual artifact - expectedAgentHashAfterUpgrade = actualAgentHashAfterUpgrade - - buildFragments := strings.Split(upgradeVersionString, "-") - require.Lenf(t, buildFragments, 2, "version %q returned by artifact api is not in format -", upgradeVersionString) - - upgradeInputVersion := version.NewParsedSemVer( - latestSnapshotVersion.Major(), - latestSnapshotVersion.Minor(), - latestSnapshotVersion.Patch(), - latestSnapshotVersion.Prerelease(), - buildFragments[1], - ) - - t.Logf("Targeting upgrade to version %+v", upgradeInputVersion) - parsedFromVersion, err := version.ParseVersion(define.Version()) - - t.Cleanup(func() { - // The watcher needs to finish before the agent is uninstalled: https://github.com/elastic/elastic-agent/issues/3371 - waitForUpgradeWatcherToComplete(t, agentFixture, parsedFromVersion, standaloneWatcherDuration) - }) - - require.NoErrorf(t, err, "define.Version() %q cannot be parsed as agent version", define.Version()) - testStandaloneUpgrade(ctx, t, agentFixture, parsedFromVersion, upgradeInputVersion, expectedAgentHashAfterUpgrade, false, true, false, CustomPGP{}) -} - -func getUpgradableVersions(ctx context.Context, t *testing.T, upgradeToVersion string) (upgradableVersions []*version.ParsedSemVer) { - t.Helper() - - const currentMajorVersions = 2 - const previousMajorVersions = 1 - - aac := tools.NewArtifactAPIClient() - vList, err := aac.GetVersions(ctx) - require.NoError(t, err, "error retrieving versions from Artifact API") - require.NotEmpty(t, vList) - - parsedUpgradeToVersion, err := version.ParseVersion(upgradeToVersion) - require.NoErrorf(t, err, "upgradeToVersion %q is not a valid version string", upgradeToVersion) - currentMajor := parsedUpgradeToVersion.Major() - var currentMajorSelected, previousMajorSelected int - - sortedParsedVersions := make(version.SortableParsedVersions, 0, len(vList.Versions)) - for _, v := range vList.Versions { - pv, err := version.ParseVersion(v) - require.NoErrorf(t, err, "invalid version retrieved from artifact API: %q", v) - sortedParsedVersions = append(sortedParsedVersions, pv) - } - - require.NotEmpty(t, sortedParsedVersions) - - // normally the output of the versions returned by artifact API is already sorted in ascending order, - // we want to sort in descending orders, so we sort them - sort.Sort(sort.Reverse(sortedParsedVersions)) - - for _, parsedVersion := range sortedParsedVersions { - if currentMajorSelected == currentMajorVersions && previousMajorSelected == previousMajorVersions { - // we got all the versions we need, break the loop - break - } - - if !parsedVersion.Less(*parsedUpgradeToVersion) { - // skip upgrade from newer versions than the one under test - t.Logf("Skipping version %q since it's newer or equal to version after upgrade %q", parsedVersion, parsedUpgradeToVersion) - continue - } - - if parsedVersion.IsSnapshot() { - // skip all snapshots - continue - } - - if parsedVersion.Major() == currentMajor && currentMajorSelected < currentMajorVersions { - upgradableVersions = append(upgradableVersions, parsedVersion) - currentMajorSelected++ - continue - } - - if parsedVersion.Major() < currentMajor && previousMajorSelected < previousMajorVersions { - upgradableVersions = append(upgradableVersions, parsedVersion) - previousMajorSelected++ - continue - } - - } - return -} - -func testStandaloneUpgrade( - ctx context.Context, - t *testing.T, - f *atesting.Fixture, - parsedFromVersion *version.ParsedSemVer, - parsedUpgradeVersion *version.ParsedSemVer, - expectedAgentHashAfterUpgrade string, - allowLocalPackage bool, - skipVerify bool, - skipDefaultPgp bool, - customPgp CustomPGP, -) { - - var nonInteractiveFlag bool - if version_8_2_0.Less(*parsedFromVersion) { - nonInteractiveFlag = true - } - installOpts := atesting.InstallOpts{ - NonInteractive: nonInteractiveFlag, - Force: true, - } - - output, err := tools.InstallAgent(installOpts, f) - t.Logf("Agent installation output: %q", string(output)) - require.NoError(t, err) - - c := f.Client() - - err = c.Connect(ctx) - require.NoError(t, err, "error connecting client to agent") - defer c.Disconnect() - - require.Eventually(t, func() bool { - return checkAgentHealthAndVersion(t, ctx, f, parsedFromVersion.CoreVersion(), parsedFromVersion.IsSnapshot(), "") - }, 2*time.Minute, 10*time.Second, "Agent never became healthy") - - t.Logf("Upgrading from version %q to version %q", parsedFromVersion, parsedUpgradeVersion) - - upgradeCmdArgs := []string{"upgrade", parsedUpgradeVersion.String()} - - useLocalPackage := allowLocalPackage && version_8_7_0.Less(*parsedFromVersion) - if useLocalPackage { - // if we are upgrading from a version > 8.7.0 (min version to skip signature verification) we pass : - // - a file:// sourceURI pointing the agent package under test - // - flag --skip-verify to bypass pgp signature verification (we don't produce signatures for PR/main builds) - tof, err := define.NewFixture(t, parsedUpgradeVersion.String()) - require.NoError(t, err) - - srcPkg, err := tof.SrcPackage(ctx) - require.NoError(t, err) - sourceURI := "file://" + filepath.Dir(srcPkg) - t.Logf("setting sourceURI to : %q", sourceURI) - upgradeCmdArgs = append(upgradeCmdArgs, "--source-uri", sourceURI) - } - if useLocalPackage || skipVerify { - upgradeCmdArgs = append(upgradeCmdArgs, "--skip-verify") - } - - if skipDefaultPgp { - upgradeCmdArgs = append(upgradeCmdArgs, "--skip-default-pgp") - } - - if len(customPgp.PGP) > 0 { - upgradeCmdArgs = append(upgradeCmdArgs, "--pgp", customPgp.PGP) - } - - if len(customPgp.PGPUri) > 0 { - upgradeCmdArgs = append(upgradeCmdArgs, "--pgp-uri", customPgp.PGPUri) - } - - if len(customPgp.PGPPath) > 0 { - upgradeCmdArgs = append(upgradeCmdArgs, "--pgp-path", customPgp.PGPPath) - } - - upgradeTriggerOutput, err := f.Exec(ctx, upgradeCmdArgs) - require.NoErrorf(t, err, "error triggering agent upgrade to version %q, output:\n%s", - parsedUpgradeVersion, upgradeTriggerOutput) - - require.Eventuallyf(t, func() bool { - return checkAgentHealthAndVersion(t, ctx, f, parsedUpgradeVersion.CoreVersion(), parsedUpgradeVersion.IsSnapshot(), expectedAgentHashAfterUpgrade) - }, 5*time.Minute, 1*time.Second, "agent never upgraded to expected version") - - if expectedAgentHashAfterUpgrade != "" { - aVersion, err := c.Version(ctx) - assert.NoError(t, err, "error checking version after upgrade") - assert.Equal(t, expectedAgentHashAfterUpgrade, aVersion.Commit, "agent commit hash changed after upgrade") - } -} - -func checkAgentHealthAndVersion(t *testing.T, ctx context.Context, f *atesting.Fixture, expectedVersion string, snapshot bool, expectedHash string) bool { - t.Helper() - - parsedExpectedVersion, err := version.ParseVersion(expectedVersion) - require.NoErrorf(t, err, "Expected version %q is not parseable", expectedVersion) - - if parsedExpectedVersion.Less(*version_8_6_0) { - // we have to parse v1 state response - return checkLegacyAgentHealthAndVersion(t, ctx, f, expectedVersion, snapshot, expectedHash) - } - - stateOut, err := f.Exec(ctx, []string{"status", "--output", "yaml"}) - if err != nil { - t.Logf("error getting the agent state: %v", err) - return false - } - - var state v2client.AgentState - err = yaml.Unmarshal(stateOut, &state) - if err != nil { - t.Logf("error unmarshaling the agent state: %v", err) - return false - } - - t.Logf("current agent state: %+v", state) - info := state.Info - if expectedHash != "" { - return info.Commit == expectedHash && state.State == v2proto.State_HEALTHY - } - return info.Version == expectedVersion && - info.Snapshot == snapshot && - state.State == v2proto.State_HEALTHY -} - -func checkLegacyAgentHealthAndVersion(t *testing.T, ctx context.Context, f *atesting.Fixture, expectedVersion string, snapshot bool, expectedHash string) bool { - stateOut, err := f.Exec(ctx, []string{"status", "--output", "json"}) - if err != nil { - t.Logf("error getting the agent state: %v", err) - return false - } - - var state v1client.AgentStatus - err = json.Unmarshal(stateOut, &state) - if err != nil { - t.Logf("error unmarshaling the agent state: %v", err) - return false - } - - t.Logf("current agent state: %+v", state) - - versionOut, err := f.Exec(ctx, []string{"version", "--yaml"}) - if err != nil { - t.Logf("error getting the agent version: %v", err) - return false - } - var aVersion cmdVersion.Output - err = yaml.Unmarshal(versionOut, &aVersion) - if err != nil { - t.Logf("error unmarshaling version output: %v", err) - return false - } - t.Logf("current agent version: %+v", aVersion) - if expectedHash != "" { - return aVersion.Daemon.Commit == expectedHash && state.Status == v1client.Healthy - } - return aVersion.Daemon.Version == expectedVersion && - aVersion.Daemon.Snapshot == snapshot && state.Status == v1client.Healthy - -} - -// waitForUpgradeWatcherToComplete asserts that the Upgrade Watcher finished running. -func waitForUpgradeWatcherToComplete(t *testing.T, f *atesting.Fixture, fromVersion *version.ParsedSemVer, timeout time.Duration) { - t.Helper() - - if fromVersion.Less(*version_8_9_0_SNAPSHOT) { - t.Logf("Version %q is too old for a quick update marker check", fromVersion) - timeout = defaultWatcherDuration - } - - t.Logf("Waiting %s for upgrade watcher to finish running", timeout) - time.Sleep(timeout) -} - -func extractCommitHashFromArtifact(t *testing.T, ctx context.Context, artifactVersion *version.ParsedSemVer, agentProject tools.Project) string { - tmpDownloadDir := t.TempDir() - - operatingSystem := runtime.GOOS - architecture := runtime.GOARCH - suffix, err := atesting.GetPackageSuffix(operatingSystem, architecture) - require.NoErrorf(t, err, "error determining suffix for OS %q and arch %q", operatingSystem, architecture) - prefix := fmt.Sprintf("elastic-agent-%s", artifactVersion.VersionWithPrerelease()) - pkgName := fmt.Sprintf("%s-%s", prefix, suffix) - require.Containsf(t, agentProject.Packages, pkgName, "Artifact API response does not contain pkg %s", pkgName) - artifactFilePath := filepath.Join(tmpDownloadDir, pkgName) - err = atesting.DownloadPackage(ctx, t, http.DefaultClient, agentProject.Packages[pkgName].URL, artifactFilePath) - require.NoError(t, err, "error downloading package") - err = atesting.ExtractArtifact(t, artifactFilePath, tmpDownloadDir) - require.NoError(t, err, "error extracting artifact") - - matches, err := filepath.Glob(filepath.Join(tmpDownloadDir, "elastic-agent-*", ".build_hash.txt")) - require.NoError(t, err) - require.NotEmpty(t, matches) - - hashFilePath := matches[0] - t.Logf("Accessing hash file %q", hashFilePath) - hashBytes, err := os.ReadFile(hashFilePath) - require.NoError(t, err, "error reading build hash") - return strings.TrimSpace(string(hashBytes)) -} - -type versionInfo struct { - Version string `yaml:"version"` - Commit string `yaml:"commit"` -} - -type versionOutput struct { - Binary versionInfo `yaml:"binary"` - Daemon versionInfo `yaml:"daemon"` -} - -func TestStandaloneUpgradeRetryDownload(t *testing.T) { - define.Require(t, define.Requirements{ - Local: false, // requires Agent installation - Isolate: false, - Sudo: true, // requires Agent installation and modifying /etc/hosts - OS: []define.OS{ - {Type: define.Linux}, - {Type: define.Darwin}, - }, // modifying /etc/hosts - }) - - t.Skip("Flaky test: https://github.com/elastic/elastic-agent/issues/3155") - - upgradeFromVersion, err := version.ParseVersion(define.Version()) - require.NoError(t, err) - - // We go back TWO minors because sometimes we are in a situation where - // the current version has been advanced to the next release (e.g. 8.10.0) - // but the version before that (e.g. 8.9.0) hasn't been released yet. - previousVersion, err := upgradeFromVersion.GetPreviousMinor() - require.NoError(t, err) - previousVersion, err = previousVersion.GetPreviousMinor() - require.NoError(t, err) - - // For testing the upgrade we actually perform a downgrade - upgradeToVersion := previousVersion - - t.Logf("Testing Elastic Agent upgrade from %s to %s...", upgradeFromVersion, upgradeToVersion) - - agentFixture, err := define.NewFixture(t, define.Version()) - require.NoError(t, err) - - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - err = agentFixture.Prepare(ctx) - require.NoError(t, err, "error preparing agent fixture") - - err = agentFixture.Configure(ctx, []byte(fastWatcherCfg)) - require.NoError(t, err, "error configuring agent fixture") - - t.Cleanup(func() { - // The watcher needs to finish before the agent is uninstalled: https://github.com/elastic/elastic-agent/issues/3371 - waitForUpgradeWatcherToComplete(t, agentFixture, upgradeFromVersion, standaloneWatcherDuration) - }) - - t.Log("Install the built Agent") - output, err := tools.InstallStandaloneAgent(agentFixture) - t.Log(string(output)) - require.NoError(t, err) - - t.Log("Ensure the correct version is running") - currentVersion, err := getVersion(t, ctx, agentFixture) - require.NoError(t, err) - - t.Log("Modify /etc/hosts to simulate transient network error") - cmd := exec.Command("sed", - "-i.bak", - "s/localhost/localhost artifacts.elastic.co artifacts-api.elastic.co/g", - "/etc/hosts", - ) - t.Log("/etc/hosts modify command: ", cmd.String()) - - output, err = cmd.CombinedOutput() - if err != nil { - t.Log(string(output)) - } - require.NoError(t, err) - - // Ensure that /etc/hosts is modified - require.Eventually(t, func() bool { - cmd := exec.Command("grep", - "artifacts", - "/etc/hosts", - ) - t.Log("Check /etc/hosts command: ", cmd.String()) - - // We don't check the error as grep will return non-zero exit code when - // it doesn't find any matches, which could happen the first couple of - // times it searches /etc/hosts. - output, _ := cmd.CombinedOutput() - outputStr := strings.TrimSpace(string(output)) - - return outputStr != "" - }, 10*time.Second, 1*time.Second) - - defer restoreEtcHosts() - - t.Log("Start the Agent upgrade") - toVersion := upgradeToVersion.String() - var wg sync.WaitGroup - go func() { - wg.Add(1) - - err := upgradeAgent(ctx, toVersion, agentFixture, t.Log) - - wg.Done() - require.NoError(t, err) - }() - - t.Log("Check Agent logs for at least two retry messages") - agentDirName := fmt.Sprintf("elastic-agent-%s", release.TrimCommit(currentVersion.Daemon.Commit)) - logsPath := filepath.Join(paths.DefaultBasePath, "Elastic", "Agent", "data", agentDirName, "logs") - require.Eventually(t, func() bool { - cmd := exec.Command("grep", - "download.*retrying", - "--recursive", - "--include", "*.ndjson", - logsPath, - ) - t.Log("Find logs command: ", cmd.String()) - - // We don't check the error as grep will return non-zero exit code when - // it doesn't find any matches, which could happen the first couple of - // times it searches the Elastic Agent logs. - output, _ := cmd.CombinedOutput() - outputStr := strings.TrimSpace(string(output)) - - outputLines := strings.Split(outputStr, "\n") - t.Log(outputLines) - t.Log("Number of retry messages: ", len(outputLines)) - return len(outputLines) >= 2 - }, 2*time.Minute, 20*time.Second) - - t.Log("Restore /etc/hosts so upgrade can proceed") - err = restoreEtcHosts() - require.NoError(t, err) - - // Wait for upgrade command to finish executing - t.Log("Waiting for upgrade to finish") - wg.Wait() - - t.Log("Check Agent version to ensure upgrade is successful") - currentVersion, err = getVersion(t, ctx, agentFixture) - require.NoError(t, err) - require.Equal(t, toVersion, currentVersion.Binary.Version) - require.Equal(t, toVersion, currentVersion.Daemon.Version) -} - -func getVersion(t *testing.T, ctx context.Context, agentFixture *atesting.Fixture) (*versionOutput, error) { - var currentVersion versionOutput - var err error - - require.Eventually(t, func() bool { - args := []string{"version", "--yaml"} - var output []byte - output, err = agentFixture.Exec(ctx, args) - if err != nil { - t.Log(string(output)) - return false - } - - err = yaml.Unmarshal(output, ¤tVersion) - return err == nil - }, 1*time.Minute, 1*time.Second) - - return ¤tVersion, err -} - -func restoreEtcHosts() error { - cmd := exec.Command("mv", - "/etc/hosts.bak", - "/etc/hosts", - ) - return cmd.Run() -} - -func upgradeAgent(ctx context.Context, version string, agentFixture *atesting.Fixture, log func(args ...any)) error { - args := []string{"upgrade", version} - output, err := agentFixture.Exec(ctx, args) - if err != nil { - log("Upgrade command output after error: ", string(output)) - return err - } - - return nil -} - -func TestUpgradeBrokenPackageVersion(t *testing.T) { - define.Require(t, define.Requirements{ - // We require sudo for this test to run - // `elastic-agent install`. - Sudo: true, - - // It's not safe to run this test locally as it - // installs Elastic Agent. - Local: false, - }) - - // Get path to Elastic Agent executable - f, err := define.NewFixture(t, define.Version()) - require.NoError(t, err) - - fromVersion, err := version.ParseVersion(define.Version()) - require.NoError(t, err) - - // Prepare the Elastic Agent so the binary is extracted and ready to use. - err = f.Prepare(context.Background()) - require.NoError(t, err) - - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - err = f.Configure(ctx, []byte(fastWatcherCfg)) - require.NoError(t, err, "error configuring agent fixture") - - t.Cleanup(func() { - // The watcher needs to finish before the agent is uninstalled: https://github.com/elastic/elastic-agent/issues/3371 - waitForUpgradeWatcherToComplete(t, f, fromVersion, standaloneWatcherDuration) - }) - - output, err := tools.InstallStandaloneAgent(f) - t.Logf("Agent installation output: %q", string(output)) - require.NoError(t, err) - - c := f.Client() - - require.Eventually(t, func() bool { - err := c.Connect(ctx) - if err != nil { - t.Logf("connecting client to agent: %v", err) - return false - } - defer c.Disconnect() - state, err := c.State(ctx) - if err != nil { - t.Logf("error getting the agent state: %v", err) - return false - } - t.Logf("agent state: %+v", state) - return state.State == v2proto.State_HEALTHY - }, 2*time.Minute, 10*time.Second, "Agent never became healthy") - - // get rid of the package version files in the installed directory - removePackageVersionFiles(t, f) - - // get the version returned by the currently running agent - actualVersionBytes := getAgentVersion(t, f, context.Background(), false) - - actualVersion := unmarshalVersionOutput(t, actualVersionBytes, "daemon") - - // start the upgrade to the latest version - require.NotEmpty(t, actualVersion, "broken agent package version should not be empty") - - // upgrade to latest version whatever that will be - aac := tools.NewArtifactAPIClient() - versionList, err := aac.GetVersions(ctx) - require.NoError(t, err) - require.NotEmpty(t, versionList.Versions, "Artifact API returned no versions") - latestVersion := versionList.Versions[len(versionList.Versions)-1] - - t.Logf("Upgrading to version %q", latestVersion) - - err = c.Connect(ctx) - require.NoError(t, err, "error connecting client to agent") - defer c.Disconnect() - - _, err = c.Upgrade(ctx, latestVersion, "", false, false) - require.NoErrorf(t, err, "error triggering agent upgrade to version %q", latestVersion) - parsedLatestVersion, err := version.ParseVersion(latestVersion) - require.NoError(t, err) - - require.Eventuallyf(t, func() bool { - state, err := c.State(ctx) - if err != nil { - t.Logf("error getting the agent state: %v", err) - return false - } - t.Logf("current agent state: %+v", state) - return state.Info.Version == parsedLatestVersion.CoreVersion() && - state.Info.Snapshot == parsedLatestVersion.IsSnapshot() && - state.State == v2proto.State_HEALTHY - }, 5*time.Minute, 10*time.Second, "agent never upgraded to expected version") -} - -func removePackageVersionFiles(t *testing.T, f *atesting.Fixture) { - installFS := os.DirFS(f.WorkDir()) - matches := []string{} - - err := fs.WalkDir(installFS, ".", func(path string, d fs.DirEntry, err error) error { - if err != nil { - return err - } - - if d.Name() == agtversion.PackageVersionFileName { - matches = append(matches, path) - } - return nil - }) - require.NoError(t, err) - - t.Logf("package version files found: %v", matches) - - // the version files should have been removed from the other test, we just make sure - for _, m := range matches { - vFile := filepath.Join(f.WorkDir(), m) - t.Logf("removing package version file %q", vFile) - err = os.Remove(vFile) - require.NoErrorf(t, err, "error removing package version file %q", vFile) - } -} - -// TestStandaloneUpgradeFailsStatus tests the scenario where upgrading to a new version -// of Agent fails due to the new Agent binary reporting an unhealthy status. It checks -// that the Agent is rolled back to the previous version. -func TestStandaloneUpgradeFailsStatus(t *testing.T) { - define.Require(t, define.Requirements{ - Local: false, // requires Agent installation - Isolate: false, - Sudo: true, // requires Agent installation - }) - - upgradeFromVersion, err := version.ParseVersion(define.Version()) - require.NoError(t, err) - - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - // Get available versions from Artifacts API - aac := tools.NewArtifactAPIClient() - versionList, err := aac.GetVersions(ctx) - require.NoError(t, err) - require.NotEmpty(t, versionList.Versions, "Artifact API returned no versions") - - // Determine the version that's TWO versions behind the latest. This is necessary for two reasons: - // 1. We don't want to necessarily use the latest version as it might be the same as the - // local one, which will then cause the invalid input in the Agent test policy (defined further - // below in this test) to come into play with the Agent version we're upgrading from, thus preventing - // it from ever becoming healthy. - // 2. We don't want to necessarily use the version that's one before the latest because sometimes we - // are in a situation where the latest version has been advanced to the next release (e.g. 8.10.0) - // but the version before that (e.g. 8.9.0) hasn't been released yet. - require.GreaterOrEqual(t, len(versionList.Versions), 3) - upgradeToVersionStr := versionList.Versions[len(versionList.Versions)-3] - - upgradeToVersion, err := version.ParseVersion(upgradeToVersionStr) - require.NoError(t, err) - - t.Logf("Testing Elastic Agent upgrade from %s to %s...", upgradeFromVersion, upgradeToVersion) - - agentFixture, err := define.NewFixture(t, define.Version()) - require.NoError(t, err) - - err = agentFixture.Prepare(ctx) - require.NoError(t, err, "error preparing agent fixture") - - // Configure Agent with fast watcher configuration and also an invalid - // input when the Agent version matches the upgraded Agent version. This way - // the pre-upgrade version of the Agent runs healthy, but the post-upgrade - // version doesn't. - invalidInputPolicy := fastWatcherCfg + fmt.Sprintf(` -outputs: - default: - type: elasticsearch - hosts: [127.0.0.1:9200] - -inputs: - - condition: '${agent.version.version} == "%s"' - type: invalid - id: invalid-input -`, upgradeToVersion.CoreVersion()) - - err = agentFixture.Configure(ctx, []byte(invalidInputPolicy)) - require.NoError(t, err, "error configuring agent fixture") - - t.Log("Install the built Agent") - output, err := tools.InstallStandaloneAgent(agentFixture) - t.Log(string(output)) - require.NoError(t, err) - - c := agentFixture.Client() - require.Eventually(t, func() bool { - return checkAgentHealthAndVersion(t, ctx, agentFixture, upgradeFromVersion.CoreVersion(), upgradeFromVersion.IsSnapshot(), "") - }, 2*time.Minute, 10*time.Second, "Agent never became healthy") - - toVersion := upgradeToVersion.String() - t.Logf("Upgrading Agent to %s", toVersion) - err = c.Connect(ctx) - require.NoError(t, err, "error connecting client to agent") - defer c.Disconnect() - - _, err = c.Upgrade(ctx, toVersion, "", false, false) - require.NoErrorf(t, err, "error triggering agent upgrade to version %q", toVersion) - - require.Eventually(t, func() bool { - return checkAgentHealthAndVersion(t, ctx, agentFixture, upgradeToVersion.CoreVersion(), upgradeToVersion.IsSnapshot(), "") - }, 2*time.Minute, 250*time.Millisecond, "Upgraded Agent never became healthy") - - // Wait for upgrade watcher to finish running - waitForUpgradeWatcherToComplete(t, agentFixture, upgradeFromVersion, standaloneWatcherDuration) - - t.Log("Ensure the we have rolled back and the correct version is running") - require.Eventually(t, func() bool { - return checkAgentHealthAndVersion(t, ctx, agentFixture, upgradeFromVersion.CoreVersion(), upgradeFromVersion.IsSnapshot(), "") - }, 2*time.Minute, 10*time.Second, "Rolled back Agent never became healthy") -} - -type CustomPGP struct { - PGP string - PGPUri string - PGPPath string -} diff --git a/testing/integration/upgrade_uninstall_test.go b/testing/integration/upgrade_uninstall_test.go new file mode 100644 index 00000000000..a26a3bb6d1e --- /dev/null +++ b/testing/integration/upgrade_uninstall_test.go @@ -0,0 +1,90 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +//go:build integration + +package integration + +import ( + "context" + "errors" + "testing" + "time" + + "github.com/elastic/elastic-agent/pkg/version" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + atesting "github.com/elastic/elastic-agent/pkg/testing" + "github.com/elastic/elastic-agent/pkg/testing/define" + "github.com/elastic/elastic-agent/testing/upgradetest" +) + +func TestStandaloneUpgradeUninstallKillWatcher(t *testing.T) { + define.Require(t, define.Requirements{ + Local: false, // requires Agent installation + Sudo: true, // requires Agent installation + }) + + currentVersion, err := version.ParseVersion(define.Version()) + require.NoError(t, err) + if currentVersion.Less(*upgradetest.Version_8_11_0_SNAPSHOT) { + t.Skipf("Version %s is lower than min version %s; test cannot be performed", define.Version(), upgradetest.Version_8_11_0_SNAPSHOT) + } + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + // Start at old version, we want this test to upgrade to our + // build to ensure that the uninstall will kill the watcher. + startVersion, err := upgradetest.PreviousMinor(ctx, define.Version()) + require.NoError(t, err) + startFixture, err := atesting.NewFixture( + t, + startVersion, + atesting.WithFetcher(atesting.ArtifactFetcher()), + ) + require.NoError(t, err) + + // Upgrades to build under test. + endFixture, err := define.NewFixture(t, define.Version()) + require.NoError(t, err) + endVersionInfo, err := endFixture.ExecVersion(ctx) + require.NoError(t, err, "failed to get end agent build version info") + + // Use the post-upgrade hook to bypass the remainder of the PerformUpgrade + // because we want to do our own checks for the rollback. + var ErrPostExit = errors.New("post exit") + postUpgradeHook := func() error { + return ErrPostExit + } + + err = upgradetest.PerformUpgrade( + ctx, startFixture, endFixture, t, + upgradetest.WithPostUpgradeHook(postUpgradeHook)) + if !errors.Is(err, ErrPostExit) { + require.NoError(t, err) + } + + // wait for the agent to be healthy and at the new version + err = upgradetest.WaitHealthyAndVersion(ctx, startFixture, endVersionInfo.Binary, 10*time.Minute, 10*time.Second, t) + if err != nil { + // agent never got healthy, but we need to ensure the watcher is stopped before continuing (this + // prevents this test failure from interfering with another test) + // this kills the watcher instantly and waits for it to be gone before continuing + watcherErr := upgradetest.WaitForNoWatcher(ctx, 1*time.Minute, time.Second, 100*time.Millisecond) + if watcherErr != nil { + t.Logf("failed to kill watcher due to agent not becoming healthy: %s", watcherErr) + } + } + require.NoError(t, err) + + // call uninstall now, do not wait for the watcher to finish running + // 8.11+ should always kill the running watcher (if it doesn't uninstall will fail) + uninstallCtx, uninstallCancel := context.WithTimeout(context.Background(), 5*time.Minute) + defer uninstallCancel() + output, err := startFixture.Uninstall(uninstallCtx, &atesting.UninstallOpts{Force: true}) + assert.NoError(t, err, "uninstall failed with output:\n%s", string(output)) +} diff --git a/testing/upgradetest/logger.go b/testing/upgradetest/logger.go new file mode 100644 index 00000000000..fafd520d490 --- /dev/null +++ b/testing/upgradetest/logger.go @@ -0,0 +1,9 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package upgradetest + +type Logger interface { + Logf(format string, args ...interface{}) +} diff --git a/testing/upgradetest/upgrader.go b/testing/upgradetest/upgrader.go new file mode 100644 index 00000000000..64dcb2b70ac --- /dev/null +++ b/testing/upgradetest/upgrader.go @@ -0,0 +1,367 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package upgradetest + +import ( + "context" + "encoding/json" + "fmt" + "path/filepath" + "time" + + v1client "github.com/elastic/elastic-agent/pkg/control/v1/client" + v2proto "github.com/elastic/elastic-agent/pkg/control/v2/cproto" + atesting "github.com/elastic/elastic-agent/pkg/testing" + "github.com/elastic/elastic-agent/pkg/version" +) + +// CustomPGP allows for custom PGP options on upgrade. +type CustomPGP struct { + PGP string + PGPUri string + PGPPath string +} + +type upgradeOpts struct { + sourceURI *string + + skipVerify bool + skipDefaultPgp bool + customPgp *CustomPGP + + preInstallHook func() error + postInstallHook func() error + preUpgradeHook func() error + postUpgradeHook func() error +} + +type upgradeOpt func(opts *upgradeOpts) + +// WithSourceURI sets a specific --source-uri for the upgrade +// command. This doesn't change the verification of the upgrade +// the resulting upgrade must still be the same agent provided +// in the endFixture variable. +func WithSourceURI(sourceURI string) upgradeOpt { + return func(opts *upgradeOpts) { + opts.sourceURI = &sourceURI + } +} + +// WithSkipVerify sets the skip verify option for upgrade. +func WithSkipVerify(skipVerify bool) upgradeOpt { + return func(opts *upgradeOpts) { + opts.skipVerify = skipVerify + } +} + +// WithSkipDefaultPgp sets the skip default pgp option for upgrade. +func WithSkipDefaultPgp(skipDefaultPgp bool) upgradeOpt { + return func(opts *upgradeOpts) { + opts.skipDefaultPgp = skipDefaultPgp + } +} + +// WithCustomPGP sets a custom pgp configuration for upgrade. +func WithCustomPGP(customPgp CustomPGP) upgradeOpt { + return func(opts *upgradeOpts) { + opts.customPgp = &customPgp + } +} + +// WithPreInstallHook sets a hook to be called before install. +func WithPreInstallHook(hook func() error) upgradeOpt { + return func(opts *upgradeOpts) { + opts.preInstallHook = hook + } +} + +// WithPostInstallHook sets a hook to be called before install. +func WithPostInstallHook(hook func() error) upgradeOpt { + return func(opts *upgradeOpts) { + opts.postInstallHook = hook + } +} + +// WithPreUpgradeHook sets a hook to be called before install. +func WithPreUpgradeHook(hook func() error) upgradeOpt { + return func(opts *upgradeOpts) { + opts.preUpgradeHook = hook + } +} + +// WithPostUpgradeHook sets a hook to be called before install. +func WithPostUpgradeHook(hook func() error) upgradeOpt { + return func(opts *upgradeOpts) { + opts.postUpgradeHook = hook + } +} + +// PerformUpgrade performs the upgrading of the Elastic Agent. +func PerformUpgrade( + ctx context.Context, + startFixture *atesting.Fixture, + endFixture *atesting.Fixture, + logger Logger, + opts ...upgradeOpt, +) error { + // use the passed in options to perform the upgrade + // `skipVerify` is by default enabled, because default is to perform a local + // upgrade to a built version of the Elastic Agent. + var upgradeOpts upgradeOpts + upgradeOpts.skipVerify = true + for _, o := range opts { + o(&upgradeOpts) + } + + // ensure that both the starting and ending fixtures are prepared + err := startFixture.EnsurePrepared(ctx) + if err != nil { + return fmt.Errorf("failed to prepare the startFixture: %w", err) + } + err = endFixture.EnsurePrepared(ctx) + if err != nil { + return fmt.Errorf("failed to prepare the endFixture: %w", err) + } + + // start fixture gets the agent configured to use a faster watcher + err = ConfigureFastWatcher(ctx, startFixture) + if err != nil { + return fmt.Errorf("failed configuring the start agent with faster watcher configuration: %w", err) + } + + // get the versions from each fixture (that ensures that it's always the + // same version that the fixture is working with) + startVersionInfo, err := startFixture.ExecVersion(ctx) + if err != nil { + return fmt.Errorf("failed to get start agent build version info: %w", err) + } + startParsedVersion, err := version.ParseVersion(startVersionInfo.Binary.String()) + if err != nil { + return fmt.Errorf("failed to get parsed start agent build version (%s): %w", startVersionInfo.Binary.String(), err) + } + endVersionInfo, err := endFixture.ExecVersion(ctx) + if err != nil { + return fmt.Errorf("failed to get end agent build version info: %w", err) + } + + if upgradeOpts.preInstallHook != nil { + if err := upgradeOpts.preInstallHook(); err != nil { + return fmt.Errorf("pre install hook failed: %w", err) + } + } + + logger.Logf("Installing version %q", startParsedVersion.VersionWithPrerelease()) + + // install the start agent + var nonInteractiveFlag bool + if Version_8_2_0.Less(*startParsedVersion) { + nonInteractiveFlag = true + } + installOpts := atesting.InstallOpts{ + NonInteractive: nonInteractiveFlag, + Force: true, + } + output, err := startFixture.Install(ctx, &installOpts) + if err != nil { + return fmt.Errorf("failed to install start agent (err: %w) [output: %s]", err, string(output)) + } + + if upgradeOpts.postInstallHook != nil { + if err := upgradeOpts.postInstallHook(); err != nil { + return fmt.Errorf("post install hook failed: %w", err) + } + } + + // wait for the agent to be healthy and correct version + err = WaitHealthyAndVersion(ctx, startFixture, startVersionInfo.Binary, 2*time.Minute, 10*time.Second, logger) + if err != nil { + // context added by WaitHealthyAndVersion + return err + } + + if upgradeOpts.preUpgradeHook != nil { + if err := upgradeOpts.preUpgradeHook(); err != nil { + return fmt.Errorf("pre upgrade hook failed: %w", err) + } + } + + logger.Logf("Upgrading from version %q to version %q", startParsedVersion, endVersionInfo.Binary.String()) + + upgradeCmdArgs := []string{"upgrade", endVersionInfo.Binary.String()} + if upgradeOpts.sourceURI == nil { + // no --source-uri set so it comes from the endFixture + srcPkg, err := endFixture.SrcPackage(ctx) + if err != nil { + return fmt.Errorf("failed to get end agent source package path: %w", err) + } + sourceURI := "file://" + filepath.Dir(srcPkg) + upgradeCmdArgs = append(upgradeCmdArgs, "--source-uri", sourceURI) + } else if *upgradeOpts.sourceURI != "" { + // specific ---source-uri + upgradeCmdArgs = append(upgradeCmdArgs, "--source-uri", *upgradeOpts.sourceURI) + } + + if upgradeOpts.customPgp != nil { + if len(upgradeOpts.customPgp.PGP) > 0 { + upgradeCmdArgs = append(upgradeCmdArgs, "--pgp", upgradeOpts.customPgp.PGP) + } + + if len(upgradeOpts.customPgp.PGPUri) > 0 { + upgradeCmdArgs = append(upgradeCmdArgs, "--pgp-uri", upgradeOpts.customPgp.PGPUri) + } + + if len(upgradeOpts.customPgp.PGPPath) > 0 { + upgradeCmdArgs = append(upgradeCmdArgs, "--pgp-path", upgradeOpts.customPgp.PGPPath) + } + } + + if upgradeOpts.skipVerify { + upgradeCmdArgs = append(upgradeCmdArgs, "--skip-verify") + } + + if upgradeOpts.skipDefaultPgp && !startParsedVersion.Less(*Version_8_10_0_SNAPSHOT) { + upgradeCmdArgs = append(upgradeCmdArgs, "--skip-default-pgp") + } + + upgradeOutput, err := startFixture.Exec(ctx, upgradeCmdArgs) + if err != nil { + return fmt.Errorf("failed to start agent upgrade to version %q: %w\n%s", endVersionInfo.Binary.Version, err, upgradeOutput) + } + + // wait for the watcher to show up + logger.Logf("waiting for upgrade watcher to start") + err = WaitForWatcher(ctx, 2*time.Minute, 10*time.Second) + if err != nil { + return fmt.Errorf("failed to find watcher: %w", err) + } + logger.Logf("upgrade watcher started") + + if upgradeOpts.postUpgradeHook != nil { + if err := upgradeOpts.postUpgradeHook(); err != nil { + return fmt.Errorf("post upgrade hook failed: %w", err) + } + } + + // wait for the agent to be healthy and correct version + err = WaitHealthyAndVersion(ctx, startFixture, endVersionInfo.Binary, 2*time.Minute, 10*time.Second, logger) + if err != nil { + // agent never got healthy, but we need to ensure the watcher is stopped before continuing + // this kills the watcher instantly and waits for it to be gone before continuing + watcherErr := WaitForNoWatcher(ctx, 1*time.Minute, time.Second, 100*time.Millisecond) + if watcherErr != nil { + logger.Logf("failed to kill watcher due to agent not becoming healthy: %s", watcherErr) + } + + // error context added by WaitHealthyAndVersion + return err + } + + // it is unstable to continue until the watcher is done + // the maximum wait time is 1 minutes (2 minutes for grace) some older versions + // do not respect the `ConfigureFastWatcher` so we have to kill the watcher after the + // 1 minute window (1 min 15 seconds for grace) has passed. + logger.Logf("waiting for upgrade watcher to finish") + err = WaitForNoWatcher(ctx, 2*time.Minute, 10*time.Second, 1*time.Minute+15*time.Second) + if err != nil { + return fmt.Errorf("watcher never stopped running: %w", err) + } + logger.Logf("upgrade watcher finished") + + // now that the watcher has stopped lets ensure that it's still the expected + // version, otherwise it's possible that it was rolled back to the original version + err = CheckHealthyAndVersion(ctx, startFixture, endVersionInfo.Binary) + if err != nil { + // error context added by CheckHealthyAndVersion + return err + } + return nil +} + +func CheckHealthyAndVersion(ctx context.Context, f *atesting.Fixture, versionInfo atesting.AgentBinaryVersion) error { + checkFunc := func() error { + status, err := f.ExecStatus(ctx) + if err != nil { + return err + } + if status.Info.Version != versionInfo.Version { + return fmt.Errorf("versions don't match: %s != %s", status.Info.Version, versionInfo.Version) + } + if status.Info.Snapshot != versionInfo.Snapshot { + return fmt.Errorf("snapshots don't match: %t != %t", status.Info.Snapshot, versionInfo.Snapshot) + } + if status.Info.Commit != versionInfo.Commit { + return fmt.Errorf("commits don't match: %s != %s", status.Info.Commit, versionInfo.Commit) + } + if status.State != int(v2proto.State_HEALTHY) { + return fmt.Errorf("agent state is not healthy: got %d", status.State) + } + return nil + } + + parsedVersion, err := version.ParseVersion(versionInfo.Version) + if err != nil { + return fmt.Errorf("failed to get parsed version (%s): %w", versionInfo.Version, err) + } + if parsedVersion.Less(*Version_8_6_0) { + // we have to handle v1 architecture of the Elastic Agent + checkFunc = func() error { + stateOut, err := f.Exec(ctx, []string{"status", "--output", "json"}) + if err != nil { + return err + } + var state v1client.AgentStatus + err = json.Unmarshal(stateOut, &state) + if err != nil { + return err + } + versionOut, err := f.ExecVersion(ctx) + if err != nil { + return err + } + + if versionOut.Binary.Version != versionInfo.Version { + return fmt.Errorf("versions don't match: %s != %s", versionOut.Binary.Version, versionInfo.Version) + } + if versionOut.Binary.Snapshot != versionInfo.Snapshot { + return fmt.Errorf("snapshots don't match: %t != %t", versionOut.Binary.Snapshot, versionInfo.Snapshot) + } + if versionOut.Binary.Commit != versionInfo.Commit { + return fmt.Errorf("commits don't match: %s != %s", versionOut.Binary.Commit, versionInfo.Commit) + } + if state.Status != v1client.Healthy { + return fmt.Errorf("agent state is not healthy: got %d", state.Status) + } + return nil + } + } + + return checkFunc() +} + +func WaitHealthyAndVersion(ctx context.Context, f *atesting.Fixture, versionInfo atesting.AgentBinaryVersion, timeout time.Duration, interval time.Duration, logger Logger) error { + ctx, cancel := context.WithTimeout(ctx, timeout) + defer cancel() + + t := time.NewTicker(interval) + defer t.Stop() + + var lastErr error + for { + select { + case <-ctx.Done(): + if lastErr != nil { + return fmt.Errorf("failed waiting for healthy agent and version (%w): %w", ctx.Err(), lastErr) + } + return ctx.Err() + case <-t.C: + err := CheckHealthyAndVersion(ctx, f, versionInfo) + if err == nil { + return nil + } + lastErr = err + logger.Logf("waiting for healthy agent and proper version: %s", err) + } + } +} diff --git a/testing/upgradetest/versions.go b/testing/upgradetest/versions.go new file mode 100644 index 00000000000..a1f046bb6aa --- /dev/null +++ b/testing/upgradetest/versions.go @@ -0,0 +1,114 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package upgradetest + +import ( + "context" + "errors" + "fmt" + "sort" + + "github.com/elastic/elastic-agent/pkg/testing/tools" + "github.com/elastic/elastic-agent/pkg/version" +) + +var ( + // Version_8_2_0 is the first version to include --non-interactive flag during install + Version_8_2_0 = version.NewParsedSemVer(8, 2, 0, "", "") + // Version_8_6_0 is the first version to use agent v2 protocol + Version_8_6_0 = version.NewParsedSemVer(8, 6, 0, "", "") + // Version_8_7_0 is the minimum version for passing --skip-verify when upgrading + Version_8_7_0 = version.NewParsedSemVer(8, 7, 0, "", "") + // Version_8_9_0_SNAPSHOT is the minimum version for upgrade to specific snapshot + minimum version + // for setting shorter watch period after upgrade + Version_8_9_0_SNAPSHOT = version.NewParsedSemVer(8, 9, 0, "SNAPSHOT", "") + // Version_8_10_0_SNAPSHOT is the minimum version for upgrade with remote pgp and skipping + // default pgp verification + Version_8_10_0_SNAPSHOT = version.NewParsedSemVer(8, 10, 0, "SNAPSHOT", "") + // Version_8_11_0_SNAPSHOT is the minimum version for uninstall command to kill the watcher upon uninstall + Version_8_11_0_SNAPSHOT = version.NewParsedSemVer(8, 11, 0, "SNAPSHOT", "") +) + +// GetUpgradableVersions returns the version that the upgradeToVersion can upgrade from. +func GetUpgradableVersions(ctx context.Context, upgradeToVersion string, currentMajorVersions int, previousMajorVersions int) ([]*version.ParsedSemVer, error) { + aac := tools.NewArtifactAPIClient() + vList, err := aac.GetVersions(ctx) + if err != nil { + return nil, fmt.Errorf("error retrieving versions from Artifact API: %w", err) + } + if len(vList.Versions) == 0 { + return nil, errors.New("retrieved versions list from Artifact API is empty") + } + + parsedUpgradeToVersion, err := version.ParseVersion(upgradeToVersion) + if err != nil { + return nil, fmt.Errorf("upgradeToVersion %q is not a valid version string: %w", upgradeToVersion, err) + } + currentMajor := parsedUpgradeToVersion.Major() + var currentMajorSelected, previousMajorSelected int + + sortedParsedVersions := make(version.SortableParsedVersions, 0, len(vList.Versions)) + for _, v := range vList.Versions { + pv, err := version.ParseVersion(v) + if err != nil { + return nil, fmt.Errorf("invalid version %q retrieved from artifact API: %w", v, err) + } + sortedParsedVersions = append(sortedParsedVersions, pv) + } + + if len(sortedParsedVersions) == 0 { + return nil, errors.New("parsed versions list is empty") + } + + // normally the output of the versions returned by artifact API is already sorted in ascending order, + // we want to sort in descending orders, so we sort them + sort.Sort(sort.Reverse(sortedParsedVersions)) + + var upgradableVersions []*version.ParsedSemVer + for _, parsedVersion := range sortedParsedVersions { + if currentMajorSelected == currentMajorVersions && previousMajorSelected == previousMajorVersions { + // we got all the versions we need, break the loop + break + } + + if !parsedVersion.Less(*parsedUpgradeToVersion) { + // skip as testing version is less than version to upgrade from + continue + } + + if parsedVersion.IsSnapshot() { + // skip all snapshots + continue + } + + if parsedVersion.Major() == currentMajor && currentMajorSelected < currentMajorVersions { + upgradableVersions = append(upgradableVersions, parsedVersion) + currentMajorSelected++ + continue + } + + if parsedVersion.Major() < currentMajor && previousMajorSelected < previousMajorVersions { + upgradableVersions = append(upgradableVersions, parsedVersion) + previousMajorSelected++ + continue + } + + } + return upgradableVersions, nil +} + +// PreviousMinor gets the previous minor version of the provided version. +// +// This checks with the artifact API to ensure to only return version that have actual builds. +func PreviousMinor(ctx context.Context, version string) (string, error) { + versions, err := GetUpgradableVersions(ctx, version, 1, 0) + if err != nil { + return "", err + } + if len(version) == 0 { + return "", fmt.Errorf("no previous minor") + } + return versions[0].String(), nil +} diff --git a/testing/upgradetest/watcher.go b/testing/upgradetest/watcher.go new file mode 100644 index 00000000000..d0505f5992d --- /dev/null +++ b/testing/upgradetest/watcher.go @@ -0,0 +1,110 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package upgradetest + +import ( + "context" + "fmt" + "os" + "time" + + atesting "github.com/elastic/elastic-agent/pkg/testing" + "github.com/elastic/elastic-agent/pkg/utils" +) + +// FastWatcherCfg is configuration that makes the watcher run faster. +const FastWatcherCfg = ` +agent.upgradetest.watcher: + grace_period: 1m + error_check.interval: 15s + crash_check.interval: 15s +` + +// ConfigureFastWatcher writes an Elastic Agent configuration that +// adjusts the watcher to be faster. +// +// Note: Not all versions or modes of the Elastic Agent respect this option +// that is why the `WaitForNoWatcher` should also be used to ensure that the +// watcher stops or is killed before continuing. +func ConfigureFastWatcher(ctx context.Context, f *atesting.Fixture) error { + return f.Configure(ctx, []byte(FastWatcherCfg)) +} + +// WaitForWatcher loops until a watcher is found running or times out. +func WaitForWatcher(ctx context.Context, timeout time.Duration, interval time.Duration) error { + ctx, cancel := context.WithTimeout(ctx, timeout) + defer cancel() + + t := time.NewTicker(interval) + defer t.Stop() + + for { + select { + case <-ctx.Done(): + return ctx.Err() + case <-t.C: + pids, err := utils.GetWatcherPIDs() + if err != nil { + return fmt.Errorf("failed to fetch watcher pids: %w", err) + } + if len(pids) > 0 { + return nil + } + } + } +} + +// WaitForNoWatcher loops until no watcher is found running, times out, or +// until the killTimeout is reached. +// +// killTimeout is needed because tests can upgrade to older versions to test +// features in the current build, but that means that when uninstall occurs +// that fixes for the watcher are not present. This ensures that even on an +// installed old build that the watcher is stopped, before uninstall is performed. +func WaitForNoWatcher(ctx context.Context, timeout time.Duration, interval time.Duration, killTimeout time.Duration) error { + ctx, cancel := context.WithTimeout(ctx, timeout) + defer cancel() + + t := time.NewTicker(interval) + defer t.Stop() + + tk := time.NewTimer(killTimeout) + defer tk.Stop() + + for { + select { + case <-ctx.Done(): + return ctx.Err() + case <-t.C: + pids, err := utils.GetWatcherPIDs() + if err != nil { + return fmt.Errorf("failed to fetch watcher pids: %w", err) + } + if len(pids) == 0 { + return nil + } + case <-tk.C: + pids, err := utils.GetWatcherPIDs() + if err != nil { + return fmt.Errorf("failed to fetch watcher pids: %w", err) + } + if len(pids) == 0 { + // all good; no watcher anyway + return nil + } + // still watcher running after `killTimeout` we consider it + // has been long enough. Just kill the watcher because it should + // have completed with in the `killTimeout` being it didn't means + // that the running Elastic Agent version does respect `ConfigureFastWatcher`. + for _, pid := range pids { + proc, err := os.FindProcess(pid) + if err == nil { + _ = proc.Kill() + } + } + // next interval ticker will check for no watcher and exit + } + } +}