From bcb10189d6273ff7b4a06b92dcb338cd522f2870 Mon Sep 17 00:00:00 2001 From: Michal Pristas Date: Thu, 21 Sep 2023 01:36:41 +0200 Subject: [PATCH 1/3] [Bug] Do not fail when remote PGPs are not available (#3427) * do not fail when remote PGPs are not available * IT * changelog * lint * https * Unit test * make sure default scenario does not yield remote unavailable * comments applied * code dedup * changelog fragment type * moved check one level up * Update internal/pkg/agent/application/upgrade/artifact/download/verifier.go Co-authored-by: Craig MacKenzie --------- Co-authored-by: Craig MacKenzie (cherry picked from commit b1d2e6b04062b8572718e583590afe678579bf9d) # Conflicts: # testing/integration/upgrade_test.go --- ...ent-handling-of-air-gapped-PGP-checks.yaml | 31 +++ .../upgrade/artifact/download/fs/verifier.go | 3 +- .../artifact/download/fs/verifier_test.go | 92 +++++---- .../artifact/download/http/verifier.go | 3 +- .../upgrade/artifact/download/verifier.go | 26 ++- testing/integration/upgrade_test.go | 184 +++++++++++++++++- 6 files changed, 285 insertions(+), 54 deletions(-) create mode 100644 changelog/fragments/1695035111-Resilient-handling-of-air-gapped-PGP-checks.yaml diff --git a/changelog/fragments/1695035111-Resilient-handling-of-air-gapped-PGP-checks.yaml b/changelog/fragments/1695035111-Resilient-handling-of-air-gapped-PGP-checks.yaml new file mode 100644 index 00000000000..caaa8a2f53a --- /dev/null +++ b/changelog/fragments/1695035111-Resilient-handling-of-air-gapped-PGP-checks.yaml @@ -0,0 +1,31 @@ +# Kind can be one of: +# - breaking-change: a change to previously-documented behavior +# - deprecation: functionality that is being removed in a later release +# - bug-fix: fixes a problem in a previous version +# - enhancement: extends functionality but does not break or fix existing behavior +# - feature: new functionality +# - known-issue: problems that we are aware of in a given version +# - security: impacts on the security of a product or a user’s deployment. +# - upgrade: important information for someone upgrading from a prior version +# - other: does not fit into any of the other categories +kind: bug-fix + +# Change summary; a 80ish characters long description of the change. +summary: Resilient handling of air gapped PGP checks + +# Long description; in case the summary is not enough to describe the change +# this field accommodate a description without length limits. +description: Elastic Agent should not fail when remote PGP is specified (or official Elastic fallback PGP used) and remote is not available + +# Affected component; a word indicating the component this changeset affects. +component: elastic-agent + +# PR number; optional; the PR number that added the changeset. +# If not present is automatically filled by the tooling finding the PR where this changelog fragment has been added. +# NOTE: the tooling supports backports, so it's able to fill the original PR number instead of the backport PR number. +# Please provide it if you are adding a fragment for a different PR. +pr: 3427 + +# Issue number; optional; the GitHub issue related to this changeset (either closes or is part of). +# If not present is automatically filled by the tooling with the issue linked to the PR number. +issue: 3368 diff --git a/internal/pkg/agent/application/upgrade/artifact/download/fs/verifier.go b/internal/pkg/agent/application/upgrade/artifact/download/fs/verifier.go index e75d25dbb7e..72066129222 100644 --- a/internal/pkg/agent/application/upgrade/artifact/download/fs/verifier.go +++ b/internal/pkg/agent/application/upgrade/artifact/download/fs/verifier.go @@ -124,10 +124,11 @@ func (v *Verifier) verifyAsc(fullPath string, skipDefaultPgp bool, pgpSources .. if len(check) == 0 { continue } - raw, err := download.PgpBytesFromSource(check, v.client) + raw, err := download.PgpBytesFromSource(v.log, check, v.client) if err != nil { return err } + if len(raw) == 0 { continue } diff --git a/internal/pkg/agent/application/upgrade/artifact/download/fs/verifier_test.go b/internal/pkg/agent/application/upgrade/artifact/download/fs/verifier_test.go index 603d997e450..5012e8244dd 100644 --- a/internal/pkg/agent/application/upgrade/artifact/download/fs/verifier_test.go +++ b/internal/pkg/agent/application/upgrade/artifact/download/fs/verifier_test.go @@ -173,50 +173,60 @@ func prepareFetchVerifyTests(dropPath, targetDir, targetFilePath, hashTargetFile } func TestVerify(t *testing.T) { - log, _ := logger.New("", false) - targetDir, err := ioutil.TempDir(os.TempDir(), "") - if err != nil { - t.Fatal(err) + tt := []struct { + Name string + RemotePGPUris []string + UnreachableCount int + }{ + {"default", nil, 0}, + {"unreachable local path", []string{download.PgpSourceURIPrefix + "https://127.0.0.1:2874/path/does/not/exist"}, 1}, } - timeout := 30 * time.Second - - config := &artifact.Config{ - TargetDirectory: targetDir, - DropPath: filepath.Join(targetDir, "drop"), - OperatingSystem: "linux", - Architecture: "32", - HTTPTransportSettings: httpcommon.HTTPTransportSettings{ - Timeout: timeout, - }, + for _, tc := range tt { + t.Run(tc.Name, func(t *testing.T) { + log, obs := logger.NewTesting("TestVerify") + targetDir, err := ioutil.TempDir(os.TempDir(), "") + require.NoError(t, err) + + timeout := 30 * time.Second + + config := &artifact.Config{ + TargetDirectory: targetDir, + DropPath: filepath.Join(targetDir, "drop"), + OperatingSystem: "linux", + Architecture: "32", + HTTPTransportSettings: httpcommon.HTTPTransportSettings{ + Timeout: timeout, + }, + } + + err = prepareTestCase(beatSpec, version, config) + require.NoError(t, err) + + testClient := NewDownloader(config) + artifact, err := testClient.Download(context.Background(), beatSpec, version) + require.NoError(t, err) + + t.Cleanup(func() { + os.Remove(artifact) + os.Remove(artifact + ".sha512") + os.RemoveAll(config.DropPath) + }) + + _, err = os.Stat(artifact) + require.NoError(t, err) + + testVerifier, err := NewVerifier(log, config, true, nil) + require.NoError(t, err) + + err = testVerifier.Verify(beatSpec, version, false, tc.RemotePGPUris...) + require.NoError(t, err) + + // log message informing remote PGP was skipped + logs := obs.FilterMessageSnippet("Skipped remote PGP located at") + require.Equal(t, tc.UnreachableCount, logs.Len()) + }) } - - if err := prepareTestCase(beatSpec, version, config); err != nil { - t.Fatal(err) - } - - testClient := NewDownloader(config) - artifact, err := testClient.Download(context.Background(), beatSpec, version) - if err != nil { - t.Fatal(err) - } - - _, err = os.Stat(artifact) - if err != nil { - t.Fatal(err) - } - - testVerifier, err := NewVerifier(log, config, true, nil) - if err != nil { - t.Fatal(err) - } - - err = testVerifier.Verify(beatSpec, version, false) - require.NoError(t, err) - - os.Remove(artifact) - os.Remove(artifact + ".sha512") - os.RemoveAll(config.DropPath) } func prepareTestCase(a artifact.Artifact, version string, cfg *artifact.Config) error { diff --git a/internal/pkg/agent/application/upgrade/artifact/download/http/verifier.go b/internal/pkg/agent/application/upgrade/artifact/download/http/verifier.go index e708fe79e91..61992e08816 100644 --- a/internal/pkg/agent/application/upgrade/artifact/download/http/verifier.go +++ b/internal/pkg/agent/application/upgrade/artifact/download/http/verifier.go @@ -125,10 +125,11 @@ func (v *Verifier) verifyAsc(a artifact.Artifact, version string, skipDefaultPgp if len(check) == 0 { continue } - raw, err := download.PgpBytesFromSource(check, v.client) + raw, err := download.PgpBytesFromSource(v.log, check, v.client) if err != nil { return err } + if len(raw) == 0 { continue } diff --git a/internal/pkg/agent/application/upgrade/artifact/download/verifier.go b/internal/pkg/agent/application/upgrade/artifact/download/verifier.go index 36e0197a0f1..2c716cb2844 100644 --- a/internal/pkg/agent/application/upgrade/artifact/download/verifier.go +++ b/internal/pkg/agent/application/upgrade/artifact/download/verifier.go @@ -20,10 +20,11 @@ import ( "strings" "time" - "github.com/elastic/elastic-agent/internal/pkg/agent/application/upgrade/artifact" + "github.com/hashicorp/go-multierror" "golang.org/x/crypto/openpgp" //nolint:staticcheck // crypto/openpgp is only receiving security updates. + "github.com/elastic/elastic-agent/internal/pkg/agent/application/upgrade/artifact" "github.com/elastic/elastic-agent/internal/pkg/agent/errors" ) @@ -32,6 +33,17 @@ const ( PgpSourceURIPrefix = "pgp_uri:" ) +var ( + ErrRemotePGPDownloadFailed = errors.New("Remote PGP download failed") + ErrInvalidLocation = errors.New("Remote PGP location is invalid") +) + +// warnLogger is a logger that only needs to implement Warnf, as that is the only functions +// that the downloadProgressReporter uses. +type warnLogger interface { + Warnf(format string, args ...interface{}) +} + // ChecksumMismatchError indicates the expected checksum for a file does not // match the computed checksum. type ChecksumMismatchError struct { @@ -168,13 +180,17 @@ func VerifyGPGSignature(file string, asciiArmorSignature, publicKey []byte) erro return nil } -func PgpBytesFromSource(source string, client http.Client) ([]byte, error) { +func PgpBytesFromSource(log warnLogger, source string, client http.Client) ([]byte, error) { if strings.HasPrefix(source, PgpSourceRawPrefix) { return []byte(strings.TrimPrefix(source, PgpSourceRawPrefix)), nil } if strings.HasPrefix(source, PgpSourceURIPrefix) { - return fetchPgpFromURI(strings.TrimPrefix(source, PgpSourceURIPrefix), client) + pgpBytes, err := fetchPgpFromURI(strings.TrimPrefix(source, PgpSourceURIPrefix), client) + if errors.Is(err, ErrRemotePGPDownloadFailed) || errors.Is(err, ErrInvalidLocation) { + log.Warnf("Skipped remote PGP located at %q because it's unavailable: %v", strings.TrimPrefix(source, PgpSourceURIPrefix), err) + } + return pgpBytes, nil } return nil, errors.New("unknown pgp source") @@ -187,7 +203,7 @@ func CheckValidDownloadUri(rawURI string) error { } if !strings.EqualFold(uri.Scheme, "https") { - return fmt.Errorf("failed to check URI %q: HTTPS is required", rawURI) + return multierror.Append(fmt.Errorf("failed to check URI %q: HTTPS is required", rawURI), ErrInvalidLocation) } return nil @@ -207,7 +223,7 @@ func fetchPgpFromURI(uri string, client http.Client) ([]byte, error) { } resp, err := http.DefaultClient.Do(req) if err != nil { - return nil, err + return nil, multierror.Append(err, ErrRemotePGPDownloadFailed) } defer resp.Body.Close() diff --git a/testing/integration/upgrade_test.go b/testing/integration/upgrade_test.go index 8bf392c7f03..2bb94ae277d 100644 --- a/testing/integration/upgrade_test.go +++ b/testing/integration/upgrade_test.go @@ -222,7 +222,7 @@ func TestStandaloneUpgrade(t *testing.T) { parsedUpgradeVersion, err := version.ParseVersion(define.Version()) require.NoErrorf(t, err, "define.Version() %q cannot be parsed as agent version", define.Version()) skipVerify := version_8_7_0.Less(*parsedVersion) - testStandaloneUpgrade(ctx, t, agentFixture, parsedVersion, parsedUpgradeVersion, "", skipVerify, true, false, "") + testStandaloneUpgrade(ctx, t, agentFixture, parsedVersion, parsedUpgradeVersion, "", skipVerify, true, false, CustomPGP{}) }) } } @@ -268,13 +268,71 @@ func TestStandaloneUpgradeWithGPGFallback(t *testing.T) { _, defaultPGP := release.PGP() firstSeven := string(defaultPGP[:7]) - customPGP := strings.Replace( + newPgp := strings.Replace( string(defaultPGP), firstSeven, "abcDEFg", 1, ) + customPGP := CustomPGP{ + PGP: newPgp, + } + + testStandaloneUpgrade(ctx, t, agentFixture, fromVersion, toVersion, "", false, false, true, customPGP) +} + +func TestStandaloneUpgradeWithGPGFallbackOneRemoteFailing(t *testing.T) { + define.Require(t, define.Requirements{ + Local: false, // requires Agent installation + Sudo: true, // requires Agent installation + }) + + minVersion := version_8_10_0_SNAPSHOT + fromVersion, err := version.ParseVersion(define.Version()) + require.NoError(t, err) + + if fromVersion.Less(*minVersion) { + t.Skipf("Version %s is lower than min version %s", define.Version(), minVersion) + } + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + // previous + toVersion, err := fromVersion.GetPreviousMinor() + require.NoError(t, err, "failed to get previous minor") + agentFixture, err := define.NewFixture( + t, + define.Version(), + ) + require.NoError(t, err, "error creating fixture") + + err = agentFixture.Prepare(ctx) + require.NoError(t, err, "error preparing agent fixture") + + err = agentFixture.Configure(ctx, []byte(fastWatcherCfg)) + require.NoError(t, err, "error configuring agent fixture") + + t.Cleanup(func() { + // The watcher needs to finish before the agent is uninstalled: https://github.com/elastic/elastic-agent/issues/3371 + waitForUpgradeWatcherToComplete(t, agentFixture, fromVersion, standaloneWatcherDuration) + }) + + _, defaultPGP := release.PGP() + firstSeven := string(defaultPGP[:7]) + newPgp := strings.Replace( + string(defaultPGP), + firstSeven, + "abcDEFg", + 1, + ) + + customPGP := CustomPGP{ + PGP: newPgp, + PGPUri: "https://127.0.0.1:3456/non/existing/path", + } + testStandaloneUpgrade(ctx, t, agentFixture, fromVersion, toVersion, "", false, false, true, customPGP) } @@ -356,7 +414,7 @@ func TestStandaloneUpgradeToSpecificSnapshotBuild(t *testing.T) { }) require.NoErrorf(t, err, "define.Version() %q cannot be parsed as agent version", define.Version()) - testStandaloneUpgrade(ctx, t, agentFixture, parsedFromVersion, upgradeInputVersion, expectedAgentHashAfterUpgrade, false, true, false, "") + testStandaloneUpgrade(ctx, t, agentFixture, parsedFromVersion, upgradeInputVersion, expectedAgentHashAfterUpgrade, false, true, false, CustomPGP{}) } func getUpgradableVersions(ctx context.Context, t *testing.T, upgradeToVersion string) (upgradableVersions []*version.ParsedSemVer) { @@ -431,7 +489,7 @@ func testStandaloneUpgrade( allowLocalPackage bool, skipVerify bool, skipDefaultPgp bool, - customPgp string, + customPgp CustomPGP, ) { var nonInteractiveFlag bool @@ -483,8 +541,16 @@ func testStandaloneUpgrade( upgradeCmdArgs = append(upgradeCmdArgs, "--skip-default-pgp") } - if len(customPgp) > 0 { - upgradeCmdArgs = append(upgradeCmdArgs, "--pgp", customPgp) + if len(customPgp.PGP) > 0 { + upgradeCmdArgs = append(upgradeCmdArgs, "--pgp", customPgp.PGP) + } + + if len(customPgp.PGPUri) > 0 { + upgradeCmdArgs = append(upgradeCmdArgs, "--pgp-uri", customPgp.PGPUri) + } + + if len(customPgp.PGPPath) > 0 { + upgradeCmdArgs = append(upgradeCmdArgs, "--pgp-path", customPgp.PGPPath) } upgradeTriggerOutput, err := f.Exec(ctx, upgradeCmdArgs) @@ -923,3 +989,109 @@ func removePackageVersionFiles(t *testing.T, f *atesting.Fixture) { require.NoErrorf(t, err, "error removing package version file %q", vFile) } } +<<<<<<< HEAD +======= + +// TestStandaloneUpgradeFailsStatus tests the scenario where upgrading to a new version +// of Agent fails due to the new Agent binary reporting an unhealthy status. It checks +// that the Agent is rolled back to the previous version. +func TestStandaloneUpgradeFailsStatus(t *testing.T) { + define.Require(t, define.Requirements{ + Local: false, // requires Agent installation + Isolate: false, + Sudo: true, // requires Agent installation + }) + + t.Skip("Affected by https://github.com/elastic/elastic-agent/issues/3371, watcher left running at end of test") + + upgradeFromVersion, err := version.ParseVersion(define.Version()) + require.NoError(t, err) + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + // Get available versions from Artifacts API + aac := tools.NewArtifactAPIClient() + versionList, err := aac.GetVersions(ctx) + require.NoError(t, err) + require.NotEmpty(t, versionList.Versions, "Artifact API returned no versions") + + // Determine the version that's TWO versions behind the latest. This is necessary for two reasons: + // 1. We don't want to necessarily use the latest version as it might be the same as the + // local one, which will then cause the invalid input in the Agent test policy (defined further + // below in this test) to come into play with the Agent version we're upgrading from, thus preventing + // it from ever becoming healthy. + // 2. We don't want to necessarily use the version that's one before the latest because sometimes we + // are in a situation where the latest version has been advanced to the next release (e.g. 8.10.0) + // but the version before that (e.g. 8.9.0) hasn't been released yet. + require.GreaterOrEqual(t, len(versionList.Versions), 3) + upgradeToVersionStr := versionList.Versions[len(versionList.Versions)-3] + + upgradeToVersion, err := version.ParseVersion(upgradeToVersionStr) + require.NoError(t, err) + + t.Logf("Testing Elastic Agent upgrade from %s to %s...", upgradeFromVersion, upgradeToVersion) + + agentFixture, err := define.NewFixture(t, define.Version()) + require.NoError(t, err) + + err = agentFixture.Prepare(ctx) + require.NoError(t, err, "error preparing agent fixture") + + // Configure Agent with fast watcher configuration and also an invalid + // input when the Agent version matches the upgraded Agent version. This way + // the pre-upgrade version of the Agent runs healthy, but the post-upgrade + // version doesn't. + invalidInputPolicy := fastWatcherCfg + fmt.Sprintf(` +outputs: + default: + type: elasticsearch + hosts: [127.0.0.1:9200] + +inputs: + - condition: '${agent.version.version} == "%s"' + type: invalid + id: invalid-input +`, upgradeToVersion.CoreVersion()) + + err = agentFixture.Configure(ctx, []byte(invalidInputPolicy)) + require.NoError(t, err, "error configuring agent fixture") + + t.Log("Install the built Agent") + output, err := tools.InstallStandaloneAgent(agentFixture) + t.Log(string(output)) + require.NoError(t, err) + + c := agentFixture.Client() + require.Eventually(t, func() bool { + return checkAgentHealthAndVersion(t, ctx, agentFixture, upgradeFromVersion.CoreVersion(), upgradeFromVersion.IsSnapshot(), "") + }, 2*time.Minute, 10*time.Second, "Agent never became healthy") + + toVersion := upgradeToVersion.String() + t.Logf("Upgrading Agent to %s", toVersion) + err = c.Connect(ctx) + require.NoError(t, err, "error connecting client to agent") + defer c.Disconnect() + + _, err = c.Upgrade(ctx, toVersion, "", false, false) + require.NoErrorf(t, err, "error triggering agent upgrade to version %q", toVersion) + + require.Eventually(t, func() bool { + return checkAgentHealthAndVersion(t, ctx, agentFixture, upgradeToVersion.CoreVersion(), upgradeToVersion.IsSnapshot(), "") + }, 2*time.Minute, 250*time.Millisecond, "Upgraded Agent never became healthy") + + // Wait for upgrade watcher to finish running + waitForUpgradeWatcherToComplete(t, agentFixture, upgradeFromVersion, standaloneWatcherDuration) + + t.Log("Ensure the we have rolled back and the correct version is running") + require.Eventually(t, func() bool { + return checkAgentHealthAndVersion(t, ctx, agentFixture, upgradeFromVersion.CoreVersion(), upgradeFromVersion.IsSnapshot(), "") + }, 2*time.Minute, 10*time.Second, "Rolled back Agent never became healthy") +} + +type CustomPGP struct { + PGP string + PGPUri string + PGPPath string +} +>>>>>>> b1d2e6b040 ([Bug] Do not fail when remote PGPs are not available (#3427)) From 93558d652dcc01a2876ffb949282d1d0b38d2c32 Mon Sep 17 00:00:00 2001 From: Michal Pristas Date: Thu, 21 Sep 2023 08:45:23 +0200 Subject: [PATCH 2/3] conflicts --- testing/integration/upgrade_test.go | 3 --- 1 file changed, 3 deletions(-) diff --git a/testing/integration/upgrade_test.go b/testing/integration/upgrade_test.go index 2bb94ae277d..6d0c7f10ee6 100644 --- a/testing/integration/upgrade_test.go +++ b/testing/integration/upgrade_test.go @@ -989,8 +989,6 @@ func removePackageVersionFiles(t *testing.T, f *atesting.Fixture) { require.NoErrorf(t, err, "error removing package version file %q", vFile) } } -<<<<<<< HEAD -======= // TestStandaloneUpgradeFailsStatus tests the scenario where upgrading to a new version // of Agent fails due to the new Agent binary reporting an unhealthy status. It checks @@ -1094,4 +1092,3 @@ type CustomPGP struct { PGPUri string PGPPath string } ->>>>>>> b1d2e6b040 ([Bug] Do not fail when remote PGPs are not available (#3427)) From f24b5bdc964f775ba763638766bef7bb0d7ca41d Mon Sep 17 00:00:00 2001 From: Michal Pristas Date: Thu, 21 Sep 2023 16:10:42 +0200 Subject: [PATCH 3/3] skip IT --- testing/integration/upgrade_test.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/testing/integration/upgrade_test.go b/testing/integration/upgrade_test.go index 6d0c7f10ee6..79c6f3658c8 100644 --- a/testing/integration/upgrade_test.go +++ b/testing/integration/upgrade_test.go @@ -288,6 +288,8 @@ func TestStandaloneUpgradeWithGPGFallbackOneRemoteFailing(t *testing.T) { Sudo: true, // requires Agent installation }) + t.Skip("Fails upgrading to a version that doesn't exist: https://github.com/elastic/elastic-agent/issues/3397") + minVersion := version_8_10_0_SNAPSHOT fromVersion, err := version.ParseVersion(define.Version()) require.NoError(t, err)