Skip to content

Commit

Permalink
Merge branch 'main' of github.com:elastic/elastic-agent into feat/sta…
Browse files Browse the repository at this point in the history
…rt-server-when-mon-enabled
  • Loading branch information
michalpristas committed Oct 6, 2023
2 parents 3372c5f + 33c6934 commit c733556
Show file tree
Hide file tree
Showing 27 changed files with 430 additions and 314 deletions.
5 changes: 3 additions & 2 deletions .buildkite/hooks/pre-command
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ DOCKER_REGISTRY="docker.elastic.co"
DOCKER_REGISTRY_SECRET_PATH="kv/ci-shared/platform-ingest/docker_registry_prod"
CI_DRA_ROLE_PATH="kv/ci-shared/release/dra-role"
CI_GCP_OBS_PATH="kv/ci-shared/observability-ingest/cloud/gcp"
CI_AGENT_QA_OBS_PATH="kv/ci-shared/observability-ingest/elastic-agent-ess-qa"
# CI_AGENT_QA_OBS_PATH="kv/ci-shared/observability-ingest/elastic-agent-ess-qa"
CI_ESS_STAGING_PATH="kv/ci-shared/platform-ingest/platform-ingest-ec-staging"
CI_DRA_ROLE_PATH="kv/ci-shared/release/dra-role"


Expand Down Expand Up @@ -54,7 +55,7 @@ if [[ "$BUILDKITE_PIPELINE_SLUG" == "elastic-agent" && "$BUILDKITE_STEP_KEY" ==
export TEST_INTEG_AUTH_GCP_SERVICE_TOKEN_FILE=$(realpath ./gcp.json)

# ESS credentials
export API_KEY_TOKEN=$(vault kv get -field api_key ${CI_AGENT_QA_OBS_PATH})
export API_KEY_TOKEN=$(vault kv get -field apiKey ${CI_ESS_STAGING_PATH})
echo ${API_KEY_TOKEN} > ./apiKey
export TEST_INTEG_AUTH_ESS_APIKEY_FILE=$(realpath ./apiKey)
fi
Expand Down
2 changes: 1 addition & 1 deletion .buildkite/scripts/steps/integration_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ AGENT_PACKAGE_VERSION="${OVERRIDE_AGENT_PACKAGE_VERSION}" DEV=true EXTERNAL=true

# Run integration tests
set +e
AGENT_VERSION="${OVERRIDE_TEST_AGENT_VERSION}" TEST_INTEG_AUTH_ESS_REGION=azure-eastus2 TEST_INTEG_CLEAN_ON_EXIT=true SNAPSHOT=true mage integration:test
AGENT_VERSION="${OVERRIDE_TEST_AGENT_VERSION}" TEST_INTEG_CLEAN_ON_EXIT=true SNAPSHOT=true mage integration:test
TESTS_EXIT_STATUS=$?
set -e

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# Kind can be one of:
# - breaking-change: a change to previously-documented behavior
# - deprecation: functionality that is being removed in a later release
# - bug-fix: fixes a problem in a previous version
# - enhancement: extends functionality but does not break or fix existing behavior
# - feature: new functionality
# - known-issue: problems that we are aware of in a given version
# - security: impacts on the security of a product or a user’s deployment.
# - upgrade: important information for someone upgrading from a prior version
# - other: does not fit into any of the other categories
kind: bug-fix

# Change summary; a 80ish characters long description of the change.
summary: Surface errors during Agent's enroll process, failing if any happens.

# Long description; in case the summary is not enough to describe the change
# this field accommodate a description without length limits.
# NOTE: This field will be rendered only for breaking-change and known-issue kinds at the moment.
#description:

# Affected component; a word indicating the component this changeset affects.
component: install/enroll

# PR URL; optional; the PR number that added the changeset.
# If not present is automatically filled by the tooling finding the PR where this changelog fragment has been added.
# NOTE: the tooling supports backports, so it's able to fill the original PR number instead of the backport PR number.
# Please provide it if you are adding a fragment for a different PR.
pr: https://github.com/elastic/elastic-agent/pull/3207

# Issue URL; optional; the GitHub issue related to this changeset (either closes or is part of).
# If not present is automatically filled by the tooling with the issue linked to the PR number.
#issue: https://github.com/owner/repo/1234
2 changes: 1 addition & 1 deletion docs/test-framework-dev-guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ Go version should be at least the same than the one in [.go-version](https://git

### Configuration

ESS (QA) API Key to create on https://console.qa.cld.elstc.co/deployment-features/keys
ESS (staging) API Key to create on https://staging.found.no/account/keys

Warning: if you never created a deployment on it, you won't have permission to get this key, so you will need to create one first.

Expand Down
2 changes: 1 addition & 1 deletion internal/pkg/agent/cmd/enroll.go
Original file line number Diff line number Diff line change
Expand Up @@ -351,7 +351,7 @@ func enroll(streams *cli.IOStreams, cmd *cobra.Command) error {
// Error: failed to fix permissions: chown /Library/Elastic/Agent/data/elastic-agent-c13f91/elastic-agent.app: operation not permitted
// This is because we are fixing permissions twice, once during installation and again during the enrollment step.
// When we are enrolling as part of installation on MacOS, skip the second attempt to fix permissions.
var fixPermissions bool = fromInstall
fixPermissions := fromInstall
if runtime.GOOS == "darwin" {
fixPermissions = false
}
Expand Down
62 changes: 45 additions & 17 deletions internal/pkg/agent/cmd/enroll_cmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ func newEnrollCmd(
)
}

// newEnrollCmdWithStore creates an new enrollment and accept a custom store.
// newEnrollCmdWithStore creates a new enrollment and accept a custom store.
func newEnrollCmdWithStore(
log *logger.Logger,
options *enrollCmdOption,
Expand All @@ -187,10 +187,11 @@ func newEnrollCmdWithStore(
}, nil
}

// Execute tries to enroll the agent into Fleet.
// Execute enrolls the agent into Fleet.
func (c *enrollCmd) Execute(ctx context.Context, streams *cli.IOStreams) error {
var err error
defer c.stopAgent() // ensure its stopped no matter what

span, ctx := apm.StartSpan(ctx, "enroll", "app.internal")
defer func() {
apm.CaptureError(ctx, err).Send()
Expand Down Expand Up @@ -235,7 +236,7 @@ func (c *enrollCmd) Execute(ctx context.Context, streams *cli.IOStreams) error {
// Ensure that the agent does not use a proxy configuration
// when connecting to the local fleet server.
// Note that when running fleet-server the enroll request will be sent to :8220,
// however when the agent is running afterwards requests will be sent to :8221
// however when the agent is running afterward requests will be sent to :8221
c.remoteConfig.Transport.Proxy.Disable = true
}

Expand All @@ -256,7 +257,7 @@ func (c *enrollCmd) Execute(ctx context.Context, streams *cli.IOStreams) error {

err = c.enrollWithBackoff(ctx, persistentConfig)
if err != nil {
return errors.New(err, "fail to enroll")
return fmt.Errorf("fail to enroll: %w", err)
}

if c.options.FixPermissions {
Expand All @@ -267,17 +268,23 @@ func (c *enrollCmd) Execute(ctx context.Context, streams *cli.IOStreams) error {
}

defer func() {
fmt.Fprintln(streams.Out, "Successfully enrolled the Elastic Agent.")
if err != nil {
fmt.Fprintf(streams.Err, "Something went wrong while enrolling the Elastic Agent: %v\n", err)
} else {
fmt.Fprintln(streams.Out, "Successfully enrolled the Elastic Agent.")
}
}()

if c.agentProc == nil {
if err := c.daemonReload(ctx); err != nil {
c.log.Infow("Elastic Agent might not be running; unable to trigger restart", "error", err)
} else {
c.log.Info("Successfully triggered restart on running Elastic Agent.")
if err = c.daemonReloadWithBackoff(ctx); err != nil {
c.log.Errorf("Elastic Agent might not be running; unable to trigger restart: %v", err)
return fmt.Errorf("could not reload agent daemon, unable to trigger restart: %w", err)
}

c.log.Info("Successfully triggered restart on running Elastic Agent.")
return nil
}

c.log.Info("Elastic Agent has been enrolled; start Elastic Agent")
return nil
}
Expand Down Expand Up @@ -443,24 +450,32 @@ func (c *enrollCmd) prepareFleetTLS() error {

func (c *enrollCmd) daemonReloadWithBackoff(ctx context.Context) error {
err := c.daemonReload(ctx)
if err != nil &&
(errors.Is(err, context.DeadlineExceeded) ||
errors.Is(err, context.Canceled)) {
return fmt.Errorf("could not reload deamon: %w", err)
}
if err == nil {
return nil
}

signal := make(chan struct{})
backExp := backoff.NewExpBackoff(signal, 10*time.Second, 1*time.Minute)

for i := 5; i >= 0; i-- {
var i int
for ; i < 5; i++ {
backExp.Wait()
c.log.Info("Retrying to restart...")
err = c.daemonReload(ctx)
if err == nil {
if err == nil ||
errors.Is(err, context.DeadlineExceeded) ||
errors.Is(err, context.Canceled) {
break
}
}

close(signal)
return err
return fmt.Errorf("could not reload deamon after %d retries: %w", i+1, err)
}

func (c *enrollCmd) daemonReload(ctx context.Context) error {
Expand All @@ -478,8 +493,20 @@ func (c *enrollCmd) enrollWithBackoff(ctx context.Context, persistentConfig map[

c.log.Infof("Starting enrollment to URL: %s", c.client.URI())
err := c.enroll(ctx, persistentConfig)
if err == nil {
return nil
}

const deadline = 10 * time.Minute
const frequency = 60 * time.Second

c.log.Infof("1st enrollment attempt failed, retrying for %s, every %s enrolling to URL: %s",
deadline,
frequency,
c.client.URI())
signal := make(chan struct{})
backExp := backoff.NewExpBackoff(signal, 60*time.Second, 10*time.Minute)
defer close(signal)
backExp := backoff.NewExpBackoff(signal, frequency, deadline)

for {
retry := false
Expand All @@ -498,7 +525,6 @@ func (c *enrollCmd) enrollWithBackoff(ctx context.Context, persistentConfig map[
err = c.enroll(ctx, persistentConfig)
}

close(signal)
return err
}

Expand Down Expand Up @@ -547,8 +573,10 @@ func (c *enrollCmd) enroll(ctx context.Context, persistentConfig map[string]inte
c.options.FleetServer.ElasticsearchInsecure,
)
if err != nil {
return err
return fmt.Errorf(
"failed creating fleet-server bootstrap config: %w", err)
}

// no longer need bootstrap at this point
serverConfig.Server.Bootstrap = false
fleetConfig.Server = serverConfig.Server
Expand All @@ -568,11 +596,11 @@ func (c *enrollCmd) enroll(ctx context.Context, persistentConfig map[string]inte

reader, err := yamlToReader(configToStore)
if err != nil {
return err
return fmt.Errorf("yamlToReader failed: %w", err)
}

if err := safelyStoreAgentInfo(c.configStore, reader); err != nil {
return err
return fmt.Errorf("failed to store agent config: %w", err)
}

// clear action store
Expand Down
71 changes: 50 additions & 21 deletions internal/pkg/agent/cmd/enroll_cmd_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,11 @@ import (
"os"
"runtime"
"strconv"
"strings"
"testing"
"time"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"

"github.com/elastic/elastic-agent/internal/pkg/agent/configuration"
Expand Down Expand Up @@ -159,14 +162,24 @@ func TestEnroll(t *testing.T) {
require.NoError(t, err)

streams, _, _, _ := cli.NewTestingIOStreams()
err = cmd.Execute(context.Background(), streams)
require.NoError(t, err)
ctx, cancel := context.WithTimeout(context.Background(), 1*time.Minute)
defer cancel()
err = cmd.Execute(ctx, streams)

if err != nil &&
// There is no agent running, therefore nothing to be restarted.
// However, this will cause the Enroll command to return an error
// which we'll ignore here.
!strings.Contains(err.Error(),
"could not reload agent daemon, unable to trigger restart") {
t.Fatalf("enrrol coms returned and unexpected error: %v", err)
}

config, err := readConfig(store.Content)

require.NoError(t, err)
require.Equal(t, "my-access-api-key", config.AccessAPIKey)
require.Equal(t, host, config.Client.Host)

assert.Equal(t, "my-access-api-key", config.AccessAPIKey)
assert.Equal(t, host, config.Client.Host)
},
))

Expand Down Expand Up @@ -216,16 +229,24 @@ func TestEnroll(t *testing.T) {
require.NoError(t, err)

streams, _, _, _ := cli.NewTestingIOStreams()
err = cmd.Execute(context.Background(), streams)
require.NoError(t, err)

require.True(t, store.Called)

ctx, cancel := context.WithTimeout(context.Background(), 1*time.Minute)
defer cancel()
err = cmd.Execute(ctx, streams)
if err != nil &&
// There is no agent running, therefore nothing to be restarted.
// However, this will cause the Enroll command to return an error
// which we'll ignore here.
!strings.Contains(err.Error(),
"could not reload agent daemon, unable to trigger restart") {
t.Fatalf("enrrol coms returned and unexpected error: %v", err)
}

assert.True(t, store.Called)
config, err := readConfig(store.Content)

require.NoError(t, err)
require.Equal(t, "my-access-api-key", config.AccessAPIKey)
require.Equal(t, host, config.Client.Host)
assert.NoError(t, err)
assert.Equal(t, "my-access-api-key", config.AccessAPIKey)
assert.Equal(t, host, config.Client.Host)
},
))

Expand Down Expand Up @@ -275,16 +296,24 @@ func TestEnroll(t *testing.T) {
require.NoError(t, err)

streams, _, _, _ := cli.NewTestingIOStreams()
err = cmd.Execute(context.Background(), streams)
require.NoError(t, err)

require.True(t, store.Called)

ctx, cancel := context.WithTimeout(context.Background(), 1*time.Minute)
defer cancel()
err = cmd.Execute(ctx, streams)

if err != nil &&
// There is no agent running, therefore nothing to be restarted.
// However, this will cause the Enroll command to return an error
// which we'll ignore here.
!strings.Contains(err.Error(),
"could not reload agent daemon, unable to trigger restart") {
t.Fatalf("enrrol coms returned and unexpected error: %v", err)
}

assert.True(t, store.Called)
config, err := readConfig(store.Content)

require.NoError(t, err)
require.Equal(t, "my-access-api-key", config.AccessAPIKey)
require.Equal(t, host, config.Client.Host)
assert.Equal(t, "my-access-api-key", config.AccessAPIKey)
assert.Equal(t, host, config.Client.Host)
},
))

Expand Down
4 changes: 3 additions & 1 deletion internal/pkg/agent/cmd/install.go
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ func installCmd(streams *cli.IOStreams, cmd *cobra.Command) error {
return fmt.Errorf("problem reading prompt response")
}
if url == "" {
fmt.Fprintf(streams.Out, "Enrollment cancelled because no URL was provided.\n")
fmt.Fprintln(streams.Out, "Enrollment cancelled because no URL was provided.")
return nil
}
}
Expand Down Expand Up @@ -224,6 +224,8 @@ func installCmd(streams *cli.IOStreams, cmd *cobra.Command) error {
}
}()
}

fmt.Fprintln(streams.Out, "Elastic Agent successfully installed, starting enrollment.")
}

if enroll {
Expand Down
Loading

0 comments on commit c733556

Please sign in to comment.