Skip to content

Commit

Permalink
[uninstall] ensure service is stopped on windows (#4224) (#4266)
Browse files Browse the repository at this point in the history
* [uninstall] ensure service is stopped on windows

the kardianos service manager doesn't distinguish between
'Stopped' and 'StopPending' on Windows, so we need to query
to make sure the service is really stopped.  Otherwise we can
try to remove the files while the service is still running.

* Fixing linter issue + missing arg

* Update wait_service_windows.go

* incorporate feedback and add integration test

* add changelog fragment

---------

Co-authored-by: Pierre HILBERT <[email protected]>
(cherry picked from commit 82efe13)

Co-authored-by: Lee E Hinman <[email protected]>
  • Loading branch information
mergify[bot] and leehinman authored Feb 15, 2024
1 parent bba5f2c commit f2bb27d
Show file tree
Hide file tree
Showing 5 changed files with 186 additions and 2 deletions.
32 changes: 32 additions & 0 deletions changelog/fragments/1707857612-windows_stop_before_uninstall.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# Kind can be one of:
# - breaking-change: a change to previously-documented behavior
# - deprecation: functionality that is being removed in a later release
# - bug-fix: fixes a problem in a previous version
# - enhancement: extends functionality but does not break or fix existing behavior
# - feature: new functionality
# - known-issue: problems that we are aware of in a given version
# - security: impacts on the security of a product or a user’s deployment.
# - upgrade: important information for someone upgrading from a prior version
# - other: does not fit into any of the other categories
kind: bug-fix

# Change summary; a 80ish characters long description of the change.
summary: On Windows make sure the service is stopped before uninstalling.

# Long description; in case the summary is not enough to describe the change
# this field accommodate a description without length limits.
# NOTE: This field will be rendered only for breaking-change and known-issue kinds at the moment.
#description:

# Affected component; a word indicating the component this changeset affects.
component: elastic-agent

# PR URL; optional; the PR number that added the changeset.
# If not present is automatically filled by the tooling finding the PR where this changelog fragment has been added.
# NOTE: the tooling supports backports, so it's able to fill the original PR number instead of the backport PR number.
# Please provide it if you are adding a fragment for a different PR.
#pr: https://github.com/owner/repo/1234

# Issue URL; optional; the GitHub issue related to this changeset (either closes or is part of).
# If not present is automatically filled by the tooling with the issue linked to the PR number.
#issue: https://github.com/owner/repo/1234
16 changes: 14 additions & 2 deletions internal/pkg/agent/install/uninstall.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,13 +54,25 @@ func Uninstall(cfgFile, topPath, uninstallToken string, log *logp.Logger, pt *pr
if status == service.StatusRunning {
err := svc.Stop()
if err != nil {
pt.Describe("Failed to stop service")
pt.Describe("Failed to issue stop service")
return aerrors.New(
err,
fmt.Sprintf("failed to stop service (%s)", paths.ServiceName),
fmt.Sprintf("failed to issue stop service (%s)", paths.ServiceName),
aerrors.M("service", paths.ServiceName))
}
}
// The kardianos service manager can't tell the difference
// between 'Stopped' and 'StopPending' on Windows, so make
// sure the service is stopped.
err = isStopped(30*time.Second, 250*time.Millisecond, paths.ServiceName)
if err != nil {
pt.Describe("Failed to complete stop of service")
return aerrors.New(
err,
fmt.Sprintf("failed to complete stop service (%s)", paths.ServiceName),
aerrors.M("service", paths.ServiceName))
}

pt.Describe("Successfully stopped service")

// kill any running watcher
Expand Down
17 changes: 17 additions & 0 deletions internal/pkg/agent/install/wait_service_other.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
// or more contributor license agreements. Licensed under the Elastic License;
// you may not use this file except in compliance with the Elastic License.

//go:build !windows

package install

import (
"time"
)

// isStopped waits until the service has stopped. On non Windows
// systems this isn't necessary so just return.
func isStopped(timeout time.Duration, interval time.Duration, service string) error {
return nil
}
60 changes: 60 additions & 0 deletions internal/pkg/agent/install/wait_service_windows.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
// or more contributor license agreements. Licensed under the Elastic License;
// you may not use this file except in compliance with the Elastic License.

//go:build windows

package install

import (
"fmt"
"time"

"golang.org/x/sys/windows/svc"
"golang.org/x/sys/windows/svc/mgr"
)

// isStopped queries the Windows service manager to see if the state
// of the service is stopped. It will repeat the query every
// 'interval' until the 'timeout' is reached. It returns nil if the
// system is stopped within the timeout period. An error is returned
// if the service doesn't stop before the timeout or if there are
// errors communicating with the service manager.
func isStopped(timeout time.Duration, interval time.Duration, service string) error {
var err error
var status svc.Status

m, err := mgr.Connect()
if err != nil {
return fmt.Errorf("failed to connect to service manager: %w", err)
}
defer func() {
_ = m.Disconnect()
}()

s, err := m.OpenService(service)
if err != nil {
return fmt.Errorf("failed to open service (%s): %w", service, err)
}
defer s.Close()

ticker := time.NewTicker(interval)
defer ticker.Stop()
timer := time.NewTimer(timeout)
defer timer.Stop()

for {
select {
case <-ticker.C:
status, err = s.Query()
if err != nil {
return fmt.Errorf("error querying service (%s): %w", service, err)
}
if status.State == svc.Stopped {
return nil
}
case <-timer.C:
return fmt.Errorf("timed out after %s waiting for service (%s) to stop, last state was: %d", timeout, service, status.State)
}
}
}
63 changes: 63 additions & 0 deletions testing/integration/install_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ package integration

import (
"context"
"fmt"
"math/rand"
"os"
"os/exec"
Expand Down Expand Up @@ -167,6 +168,68 @@ func TestInstallWithBasePath(t *testing.T) {
}
}

// TestRepeatedInstallUninstall will install then uninstall the agent
// repeatedly. This test exists because of a number of race
// conditions that have occurred in the uninstall process. Current
// testing shows each iteration takes around 16 seconds.
func TestRepeatedInstallUninstall(t *testing.T) {
define.Require(t, define.Requirements{
Group: Default,
// We require sudo for this test to run
// `elastic-agent install` (even though it will
// be installed as non-root).
Sudo: true,

// It's not safe to run this test locally as it
// installs Elastic Agent.
Local: false,
})

maxRunTime := 2 * time.Minute
iterations := 100
for i := 0; i < iterations; i++ {
t.Run(fmt.Sprintf("%s-%d", t.Name(), i), func(t *testing.T) {

var defaultBasePath string
switch runtime.GOOS {
case "darwin":
defaultBasePath = `/Library`
case "linux":
defaultBasePath = `/opt`
case "windows":
defaultBasePath = `C:\Program Files`
}

topPath := filepath.Join(defaultBasePath, "Elastic", "Agent")
// Get path to Elastic Agent executable
fixture, err := define.NewFixture(t, define.Version())
require.NoError(t, err)

ctx, cancel := testcontext.WithDeadline(t, context.Background(), time.Now().Add(maxRunTime))
defer cancel()

// Prepare the Elastic Agent so the binary is extracted and ready to use.
err = fixture.Prepare(ctx)
require.NoError(t, err)

// Run `elastic-agent install`. We use `--force` to prevent interactive
// execution.
opts := &atesting.InstallOpts{Force: true}
out, err := fixture.Install(ctx, opts)
if err != nil {
t.Logf("install output: %s", out)
require.NoError(t, err)
}

// Check that Agent was installed in default base path
checkInstallSuccess(t, topPath, opts.IsUnprivileged(runtime.GOOS))
t.Run("check agent package version", testAgentPackageVersion(ctx, fixture, true))
out, err = fixture.Uninstall(ctx, &atesting.UninstallOpts{Force: true})
require.NoErrorf(t, err, "uninstall failed: %s", err)
})
}
}

func checkInstallSuccess(t *testing.T, topPath string, unprivileged bool) {
t.Helper()
_, err := os.Stat(topPath)
Expand Down

0 comments on commit f2bb27d

Please sign in to comment.