Skip to content

Commit

Permalink
More watcher fixes.
Browse files Browse the repository at this point in the history
  • Loading branch information
blakerouse committed Oct 18, 2023
1 parent d5a02e1 commit 3d14b0e
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 10 deletions.
6 changes: 3 additions & 3 deletions internal/pkg/agent/application/upgrade/watcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@ type AgentWatcher struct {
func NewAgentWatcher(ch chan error, log *logger.Logger, checkInterval time.Duration) *AgentWatcher {
c := client.New()
ec := &AgentWatcher{
lastPid: -1,
notifyChan: ch,
agentClient: c,
log: log,
Expand Down Expand Up @@ -124,6 +123,7 @@ func (ch *AgentWatcher) Run(ctx context.Context) {

LOOP:
for {
ch.lastPid = -1
connectTimer := time.NewTimer(ch.checkInterval)
select {
case <-ctx.Done():
Expand Down Expand Up @@ -194,9 +194,9 @@ LOOP:
// we are now talking to a different spawned Elastic Agent
if ch.lastPid == -1 {
ch.lastPid = state.Info.PID
ch.log.Info("Communicating with PID %d", ch.lastPid)
ch.log.Info(fmt.Sprintf("Communicating with PID %d", ch.lastPid))
} else if ch.lastPid != state.Info.PID {
ch.log.Error("Communication with PID %d lost, now communicating with PID %d", ch.lastPid, state.Info.PID)
ch.log.Error(fmt.Sprintf("Communication with PID %d lost, now communicating with PID %d", ch.lastPid, state.Info.PID))
ch.lastPid = state.Info.PID
// count the PID change as a lost connection, but allow
// the communication to continue unless has become a failure
Expand Down
11 changes: 9 additions & 2 deletions testing/integration/upgrade_rollback_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,12 @@ import (
"github.com/elastic/elastic-agent/testing/upgradetest"
)

const reallyFastWatcherCfg = `
agent.upgrade.watcher:
grace_period: 1m
error_check.interval: 5s
`

// TestStandaloneUpgradeRollback tests the scenario where upgrading to a new version
// of Agent fails due to the new Agent binary reporting an unhealthy status. It checks
// that the Agent is rolled back to the previous version.
Expand Down Expand Up @@ -165,7 +171,8 @@ func TestStandaloneUpgradeRollbackOnRestarts(t *testing.T) {

err = upgradetest.PerformUpgrade(
ctx, startFixture, endFixture, t,
upgradetest.WithPostUpgradeHook(postUpgradeHook))
upgradetest.WithPostUpgradeHook(postUpgradeHook),
upgradetest.WithCustomWatcherConfig(reallyFastWatcherCfg))
if !errors.Is(err, ErrPostExit) {
require.NoError(t, err)
}
Expand Down Expand Up @@ -203,7 +210,7 @@ func TestStandaloneUpgradeRollbackOnRestarts(t *testing.T) {
err = install.StartService(topPath)
require.NoError(t, err)

// ensure that it's started before starting it again
// ensure that it's started before next loop
require.Eventuallyf(t, func() bool {
status, statusErr = install.StatusService(topPath)
if statusErr != nil {
Expand Down
20 changes: 16 additions & 4 deletions testing/upgradetest/upgrader.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,10 @@ type CustomPGP struct {
type upgradeOpts struct {
sourceURI *string

skipVerify bool
skipDefaultPgp bool
customPgp *CustomPGP
skipVerify bool
skipDefaultPgp bool
customPgp *CustomPGP
customWatcherCfg string

preInstallHook func() error
postInstallHook func() error
Expand Down Expand Up @@ -98,6 +99,13 @@ func WithPostUpgradeHook(hook func() error) upgradeOpt {
}
}

// WithCustomWatcherConfig sets a custom watcher configuration to use.
func WithCustomWatcherConfig(cfg string) upgradeOpt {
return func(opts *upgradeOpts) {
opts.customWatcherCfg = cfg
}
}

// PerformUpgrade performs the upgrading of the Elastic Agent.
func PerformUpgrade(
ctx context.Context,
Expand Down Expand Up @@ -126,7 +134,11 @@ func PerformUpgrade(
}

// start fixture gets the agent configured to use a faster watcher
err = ConfigureFastWatcher(ctx, startFixture)
if upgradeOpts.customWatcherCfg != "" {
err = startFixture.Configure(ctx, []byte(upgradeOpts.customWatcherCfg))
} else {
err = ConfigureFastWatcher(ctx, startFixture)
}
if err != nil {
return fmt.Errorf("failed configuring the start agent with faster watcher configuration: %w", err)
}
Expand Down
2 changes: 1 addition & 1 deletion testing/upgradetest/watcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ import (

// FastWatcherCfg is configuration that makes the watcher run faster.
const FastWatcherCfg = `
agent.upgradetest.watcher:
agent.upgrade.watcher:
grace_period: 1m
error_check.interval: 15s
crash_check.interval: 15s
Expand Down

0 comments on commit 3d14b0e

Please sign in to comment.