Skip to content

Commit

Permalink
Merge branch 'main' of github.com:elastic/elastic-agent into checkin-…
Browse files Browse the repository at this point in the history
…cleanup
  • Loading branch information
faec committed Sep 14, 2023
2 parents 9e1aa95 + 72ce0fe commit ce535cf
Show file tree
Hide file tree
Showing 13 changed files with 114 additions and 55 deletions.
32 changes: 32 additions & 0 deletions changelog/fragments/1694190260-improve-upgrade-logging.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# Kind can be one of:
# - breaking-change: a change to previously-documented behavior
# - deprecation: functionality that is being removed in a later release
# - bug-fix: fixes a problem in a previous version
# - enhancement: extends functionality but does not break or fix existing behavior
# - feature: new functionality
# - known-issue: problems that we are aware of in a given version
# - security: impacts on the security of a product or a user’s deployment.
# - upgrade: important information for someone upgrading from a prior version
# - other: does not fit into any of the other categories
kind: enhancement

# Change summary; a 80ish characters long description of the change.
summary: Improve logging during Agent upgrades

# Long description; in case the summary is not enough to describe the change
# this field accommodate a description without length limits.
# NOTE: This field will be rendered only for breaking-change and known-issue kinds at the moment.
#description:

# Affected component; usually one of "elastic-agent", "fleet-server", "filebeat", "metricbeat", "auditbeat", "all", etc.
component: elastic-agent

# PR URL; optional; the PR number that added the changeset.
# If not present is automatically filled by the tooling finding the PR where this changelog fragment has been added.
# NOTE: the tooling supports backports, so it's able to fill the original PR number instead of the backport PR number.
# Please provide it if you are adding a fragment for a different PR.
pr: https://github.com/elastic/elastic-agent/pull/3382

# Issue URL; optional; the GitHub issue related to this changeset (either closes or is part of).
# If not present is automatically filled by the tooling with the issue linked to the PR number.
#issue: https://github.com/owner/repo/1234
2 changes: 1 addition & 1 deletion internal/pkg/agent/application/upgrade/cleanup.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ import (
// cleanNonMatchingVersionsFromDownloads will remove files that do not have the passed version number from the downloads directory.
func cleanNonMatchingVersionsFromDownloads(log *logger.Logger, version string) error {
downloadsPath := paths.Downloads()
log.Debugw("Cleaning up non-matching downloaded versions", "version", version, "downloads.path", downloadsPath)
log.Infow("Cleaning up non-matching downloaded versions", "version", version, "downloads.path", downloadsPath)

files, err := os.ReadDir(downloadsPath)
if os.IsNotExist(err) {
Expand Down
8 changes: 3 additions & 5 deletions internal/pkg/agent/application/upgrade/crash_checker.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ package upgrade
import (
"context"
"fmt"
"os"
"sync"
"time"

Expand Down Expand Up @@ -53,7 +52,7 @@ func NewCrashChecker(ctx context.Context, ch chan error, log *logger.Logger, che
return nil, err
}

log.Debugf("running checks using '%s' controller", c.sc.Name())
log.Infof("running checks using '%s' controller", c.sc.Name())

return c, nil
}
Expand All @@ -62,9 +61,8 @@ func NewCrashChecker(ctx context.Context, ch chan error, log *logger.Logger, che
func (ch *CrashChecker) Run(ctx context.Context) {
defer ch.sc.Close()

ch.log.Debug("Crash checker started")
ch.log.Info("Crash checker started")
for {
ch.log.Debugf("watcher having PID: %d", os.Getpid())
t := time.NewTimer(ch.checkInterval)

select {
Expand All @@ -77,7 +75,7 @@ func (ch *CrashChecker) Run(ctx context.Context) {
ch.log.Error(err)
}

ch.log.Debugf("retrieved service PID [%d]", pid)
ch.log.Infof("retrieved service PID [%d]", pid)
ch.q.Push(pid)

// We decide if the Agent process has crashed in either of
Expand Down
2 changes: 1 addition & 1 deletion internal/pkg/agent/application/upgrade/error_checker.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ func NewErrorChecker(ch chan error, log *logger.Logger, checkInterval time.Durat

// Run runs the checking loop.
func (ch *ErrorChecker) Run(ctx context.Context) {
ch.log.Debug("Error checker started")
ch.log.Info("Error checker started")
for {
t := time.NewTimer(ch.checkInterval)
select {
Expand Down
22 changes: 15 additions & 7 deletions internal/pkg/agent/application/upgrade/rollback.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ func Rollback(ctx context.Context, log *logger.Logger, prevHash string, currentH

// Cleanup removes all artifacts and files related to a specified version.
func Cleanup(log *logger.Logger, currentHash string, removeMarker bool, keepLogs bool) error {
log.Debugw("Cleaning up upgrade", "hash", currentHash, "remove_marker", removeMarker)
log.Infow("Cleaning up upgrade", "hash", currentHash, "remove_marker", removeMarker)
<-time.After(afterRestartDelay)

// remove upgrade marker
Expand All @@ -78,7 +78,7 @@ func Cleanup(log *logger.Logger, currentHash string, removeMarker bool, keepLogs

// remove symlink to avoid upgrade failures, ignore error
prevSymlink := prevSymlinkPath()
log.Debugw("Removing previous symlink path", "file.path", prevSymlinkPath())
log.Infow("Removing previous symlink path", "file.path", prevSymlinkPath())
_ = os.Remove(prevSymlink)

dirPrefix := fmt.Sprintf("%s-", agentName)
Expand All @@ -93,7 +93,7 @@ func Cleanup(log *logger.Logger, currentHash string, removeMarker bool, keepLogs
}

hashedDir := filepath.Join(paths.Data(), dir)
log.Debugw("Removing hashed data directory", "file.path", hashedDir)
log.Infow("Removing hashed data directory", "file.path", hashedDir)
var ignoredDirs []string
if keepLogs {
ignoredDirs = append(ignoredDirs, "logs")
Expand All @@ -110,20 +110,28 @@ func Cleanup(log *logger.Logger, currentHash string, removeMarker bool, keepLogs
// agent during upgrade period.
func InvokeWatcher(log *logger.Logger) error {
if !IsUpgradeable() {
log.Debug("agent is not upgradable, not starting watcher")
log.Info("agent is not upgradable, not starting watcher")
return nil
}

cmd := invokeCmd()
defer func() {
if cmd.Process != nil {
log.Debugf("releasing watcher %v", cmd.Process.Pid)
log.Infof("releasing watcher %v", cmd.Process.Pid)
_ = cmd.Process.Release()
}
}()

log.Debugw("Starting upgrade watcher", "path", cmd.Path, "args", cmd.Args, "env", cmd.Env, "dir", cmd.Dir)
return cmd.Start()
log.Infow("Starting upgrade watcher", "path", cmd.Path, "args", cmd.Args, "env", cmd.Env, "dir", cmd.Dir)
if err := cmd.Start(); err != nil {
return fmt.Errorf("failed to start Upgrade Watcher: %w", err)
}

upgradeWatcherPID := cmd.Process.Pid
agentPID := os.Getpid()
log.Infow("Upgrade Watcher invoked", "agent.upgrade.watcher.process.pid", upgradeWatcherPID, "agent.process.pid", agentPID)

return nil
}

func restartAgent(ctx context.Context, log *logger.Logger) error {
Expand Down
4 changes: 2 additions & 2 deletions internal/pkg/agent/application/upgrade/step_download.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ func (u *Upgrader) downloadArtifact(ctx context.Context, version, sourceURI stri
}
}

u.log.Debugw("Downloading upgrade artifact", "version", version,
u.log.Infow("Downloading upgrade artifact", "version", version,
"source_uri", settings.SourceURI, "drop_path", settings.DropPath,
"target_path", settings.TargetDirectory, "install_path", settings.InstallPath)

Expand Down Expand Up @@ -162,7 +162,7 @@ func (u *Upgrader) downloadWithRetries(

opFn := func() error {
attempt++
u.log.Debugf("download attempt %d", attempt)
u.log.Infof("download attempt %d", attempt)

downloader, err := downloaderCtor(version, u.log, settings)
if err != nil {
Expand Down
2 changes: 1 addition & 1 deletion internal/pkg/agent/application/upgrade/step_mark.go
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ func UpdateActiveCommit(log *logger.Logger, hash string) error {
// CleanMarker removes a marker from disk.
func CleanMarker(log *logger.Logger) error {
markerFile := markerFilePath()
log.Debugw("Removing marker file", "file.path", markerFile)
log.Infow("Removing marker file", "file.path", markerFile)
if err := os.Remove(markerFile); !os.IsNotExist(err) {
return err
}
Expand Down
12 changes: 6 additions & 6 deletions internal/pkg/agent/application/upgrade/upgrade.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@ import (
"path/filepath"
"strings"

"github.com/elastic/elastic-agent/internal/pkg/config"

"github.com/otiai10/copy"
"go.elastic.co/apm"

"github.com/elastic/elastic-agent/internal/pkg/config"

"github.com/elastic/elastic-agent/internal/pkg/agent/application/info"
"github.com/elastic/elastic-agent/internal/pkg/agent/application/paths"
"github.com/elastic/elastic-agent/internal/pkg/agent/application/reexec"
Expand Down Expand Up @@ -172,7 +172,7 @@ func (u *Upgrader) Upgrade(ctx context.Context, version string, sourceURI string
cb := shutdownCallback(u.log, paths.Home(), release.Version(), version, release.TrimCommit(newHash))

// Clean everything from the downloads dir
u.log.Debugw("Removing downloads directory", "file.path", paths.Downloads())
u.log.Infow("Removing downloads directory", "file.path", paths.Downloads())
err = os.RemoveAll(paths.Downloads())
if err != nil {
u.log.Errorw("Unable to clean downloads after update", "error.message", err, "file.path", paths.Downloads())
Expand Down Expand Up @@ -231,11 +231,11 @@ func copyActionStore(log *logger.Logger, newHash string) error {
// copies legacy action_store.yml, state.yml and state.enc encrypted file if exists
storePaths := []string{paths.AgentActionStoreFile(), paths.AgentStateStoreYmlFile(), paths.AgentStateStoreFile()}
newHome := filepath.Join(filepath.Dir(paths.Home()), fmt.Sprintf("%s-%s", agentName, newHash))
log.Debugw("Copying action store", "new_home_path", newHome)
log.Infow("Copying action store", "new_home_path", newHome)

for _, currentActionStorePath := range storePaths {
newActionStorePath := filepath.Join(newHome, filepath.Base(currentActionStorePath))
log.Debugw("Copying action store path", "from", currentActionStorePath, "to", newActionStorePath)
log.Infow("Copying action store path", "from", currentActionStorePath, "to", newActionStorePath)
currentActionStore, err := os.ReadFile(currentActionStorePath)
if os.IsNotExist(err) {
// nothing to copy
Expand Down Expand Up @@ -266,7 +266,7 @@ func copyRunDirectory(log *logger.Logger, newHash string) error {
err := copyDir(log, oldRunPath, newRunPath, true)
if os.IsNotExist(err) {
// nothing to copy, operation ok
log.Debugw("Run directory not present", "old_run_path", oldRunPath)
log.Infow("Run directory not present", "old_run_path", oldRunPath)
return nil
}
if err != nil {
Expand Down
2 changes: 2 additions & 0 deletions internal/pkg/agent/cmd/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,8 @@ func run(override cfgOverrider, testingMode bool, fleetInitTimeout time.Duration
"source": agentName,
})

l.Infow("Elastic Agent started", "process.pid", os.Getpid(), "agent.version", version.GetAgentPackageVersion())

cfg, err = tryDelayEnroll(ctx, l, cfg, override)
if err != nil {
err = errors.New(err, "failed to perform delayed enrollment")
Expand Down
9 changes: 6 additions & 3 deletions internal/pkg/agent/cmd/watch.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ import (
"syscall"
"time"

"github.com/elastic/elastic-agent/version"

"github.com/elastic/elastic-agent/internal/pkg/config"

"github.com/spf13/cobra"
Expand Down Expand Up @@ -62,21 +64,22 @@ func newWatchCommandWithArgs(_ []string, streams *cli.IOStreams) *cobra.Command
}

func watchCmd(log *logp.Logger, cfg *configuration.Configuration) error {
log.Infow("Upgrade Watcher started", "process.pid", os.Getpid(), "agent.version", version.GetAgentPackageVersion())
marker, err := upgrade.LoadMarker()
if err != nil {
log.Error("failed to load marker", err)
return err
}
if marker == nil {
// no marker found we're not in upgrade process
log.Debugf("update marker not present at '%s'", paths.Data())
log.Infof("update marker not present at '%s'", paths.Data())
return nil
}

locker := filelock.NewAppLocker(paths.Top(), watcherLockFile)
if err := locker.TryLock(); err != nil {
if errors.Is(err, filelock.ErrAppAlreadyRunning) {
log.Debugf("exiting, lock already exists")
log.Info("exiting, lock already exists")
return nil
}

Expand All @@ -89,7 +92,7 @@ func watchCmd(log *logp.Logger, cfg *configuration.Configuration) error {

isWithinGrace, tilGrace := gracePeriod(marker, cfg.Settings.Upgrade.Watcher.GracePeriod)
if !isWithinGrace {
log.Debugf("not within grace [updatedOn %v] %v", marker.UpdatedOn.String(), time.Since(marker.UpdatedOn).String())
log.Infof("not within grace [updatedOn %v] %v", marker.UpdatedOn.String(), time.Since(marker.UpdatedOn).String())
// if it is started outside of upgrade loop
// if we're not within grace and marker is still there it might mean
// that cleanup was not performed ok, cleanup everything except current version
Expand Down
15 changes: 15 additions & 0 deletions internal/pkg/agent/install/testblocking/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
// or more contributor license agreements. Licensed under the Elastic License;
// you may not use this file except in compliance with the Elastic License.

package main

import (
"math"
"time"
)

// Simple program that blocks forever to ensure exes running from a directory on Windows can be removed during uninstall.
func main() {
<-time.After(time.Duration(math.MaxInt64))
}
44 changes: 21 additions & 23 deletions internal/pkg/agent/install/uninstall_windows_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,44 +12,42 @@ import (
"path/filepath"
"testing"

"github.com/otiai10/copy"
"github.com/stretchr/testify/require"
)

const simpleBlockForever = `
package main
import (
"math"
"time"
)
func main() {
<-time.After(time.Duration(math.MaxInt64))
}
`

func TestRemovePath(t *testing.T) {
dir := filepath.Join(t.TempDir(), "subdir")
err := os.Mkdir(dir, 0644)
var (
pkgName = "testblocking"
binaryName = pkgName + ".exe"
)

// Create a temporary directory that we can safely remove. The directory is created as a new
// sub-directory. This avoids having Microsoft Defender quarantine the file if it is exec'd from
// the default temporary directory.
destDir, err := os.MkdirTemp(pkgName, t.Name())
require.NoError(t, err)

src := filepath.Join(dir, "main.go")
err = os.WriteFile(src, []byte(simpleBlockForever), 0644)
require.NoError(t, err)
// Copy the test executable to the new temporary directory.
destpath, err := filepath.Abs(filepath.Join(destDir, binaryName))
require.NoErrorf(t, err, "failed dest abs %s + %s", destDir, binaryName)

srcPath, err := filepath.Abs(filepath.Join(pkgName, binaryName))
require.NoErrorf(t, err, "failed src abs %s + %s", pkgName, binaryName)

binary := filepath.Join(dir, "main.exe")
cmd := exec.Command("go", "build", "-o", binary, src)
_, err = cmd.CombinedOutput()
err = copy.Copy(srcPath, destpath, copy.Options{Sync: true})
require.NoError(t, err)

cmd = exec.Command(binary)
// Execute the test executable asynchronously.
cmd := exec.Command(destpath)
err = cmd.Start()
require.NoError(t, err)
defer func() {
_ = cmd.Process.Kill()
_ = cmd.Wait()
}()

err = RemovePath(dir)
// Ensure the directory containing the executable can be removed.
err = RemovePath(destDir)
require.NoError(t, err)
}
15 changes: 9 additions & 6 deletions magefile.go
Original file line number Diff line number Diff line change
Expand Up @@ -256,16 +256,19 @@ func (Build) Clean() {
// TestBinaries build the required binaries for the test suite.
func (Build) TestBinaries() error {
wd, _ := os.Getwd()
p := filepath.Join(wd, "pkg", "component", "fake")
for _, name := range []string{"component", "shipper"} {
binary := name
testBinaryPkgs := []string{
filepath.Join(wd, "pkg", "component", "fake", "component"),
filepath.Join(wd, "pkg", "component", "fake", "shipper"),
filepath.Join(wd, "internal", "pkg", "agent", "install", "testblocking"),
}
for _, pkg := range testBinaryPkgs {
binary := filepath.Base(pkg)
if runtime.GOOS == "windows" {
binary += ".exe"
}

fakeDir := filepath.Join(p, name)
outputName := filepath.Join(fakeDir, binary)
err := RunGo("build", "-o", outputName, filepath.Join(fakeDir))
outputName := filepath.Join(pkg, binary)
err := RunGo("build", "-o", outputName, filepath.Join(pkg))
if err != nil {
return err
}
Expand Down

0 comments on commit ce535cf

Please sign in to comment.