Skip to content

Commit

Permalink
Fix double stop components (elastic#3482)
Browse files Browse the repository at this point in the history
* Skip stopping already stopped components
  • Loading branch information
pchila authored Oct 23, 2023
1 parent 3d7b3c1 commit 97d9c80
Show file tree
Hide file tree
Showing 4 changed files with 49 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# Kind can be one of:
# - breaking-change: a change to previously-documented behavior
# - deprecation: functionality that is being removed in a later release
# - bug-fix: fixes a problem in a previous version
# - enhancement: extends functionality but does not break or fix existing behavior
# - feature: new functionality
# - known-issue: problems that we are aware of in a given version
# - security: impacts on the security of a product or a user’s deployment.
# - upgrade: important information for someone upgrading from a prior version
# - other: does not fit into any of the other categories
kind: bug-fix

# Change summary; a 80ish characters long description of the change.
summary: Prevent multiple attempts to stop an already stopped service

# Long description; in case the summary is not enough to describe the change
# this field accommodate a description without length limits.
# NOTE: This field will be rendered only for breaking-change and known-issue kinds at the moment.
#description:

# Affected component; a word indicating the component this changeset affects.
component: runtime

# PR URL; optional; the PR number that added the changeset.
# If not present is automatically filled by the tooling finding the PR where this changelog fragment has been added.
# NOTE: the tooling supports backports, so it's able to fill the original PR number instead of the backport PR number.
# Please provide it if you are adding a fragment for a different PR.
#pr: https://github.com/owner/repo/1234

# Issue URL; optional; the GitHub issue related to this changeset (either closes or is part of).
# If not present is automatically filled by the tooling with the issue linked to the PR number.
#issue: https://github.com/owner/repo/1234
3 changes: 3 additions & 0 deletions pkg/component/runtime/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -705,6 +705,7 @@ func (m *Manager) update(model component.Model, teardown bool) error {
var stoppedWg sync.WaitGroup
stoppedWg.Add(len(stop))
for _, existing := range stop {
m.logger.Debugf("Stopping component %q", existing.id)
_ = existing.stop(teardown, model.Signed)
// stop is async, wait for operation to finish,
// otherwise new instance may be started and components
Expand Down Expand Up @@ -755,6 +756,7 @@ func (m *Manager) waitForStopped(comp *componentRuntimeState) {
for {
latestState := comp.getLatest()
if latestState.State == client.UnitStateStopped {
m.logger.Debugf("component %q stopped.", compID)
return
}

Expand All @@ -767,6 +769,7 @@ func (m *Manager) waitForStopped(comp *componentRuntimeState) {

select {
case <-timeoutCh:
m.logger.Errorf("timeout exceeded waiting for component %q to stop", compID)
return
case <-time.After(stopCheckRetryPeriod):
}
Expand Down
4 changes: 4 additions & 0 deletions pkg/component/runtime/runtime.go
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,10 @@ func (s *componentRuntimeState) start() error {
}

func (s *componentRuntimeState) stop(teardown bool, signed *component.Signed) error {
if s.shuttingDown.Load() {
// already stopping
return nil
}
s.shuttingDown.Store(true)
if teardown {
return s.runtime.Teardown(signed)
Expand Down
10 changes: 10 additions & 0 deletions pkg/component/runtime/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ var (
type executeServiceCommandFunc func(ctx context.Context, log *logger.Logger, binaryPath string, spec *component.ServiceOperationsCommandSpec) error

// serviceRuntime provides the command runtime for running a component as a service.
// an instance of serviceRuntime is not reused: after being stopped, it cannot be started again.
type serviceRuntime struct {
comp component.Component
log *logger.Logger
Expand Down Expand Up @@ -124,6 +125,8 @@ func (s *serviceRuntime) Run(ctx context.Context, comm Communicator) (err error)
lastCheckin time.Time
missedCheckins int
tearingDown bool
// flag that signals if we are already stopping
stopping bool
ignoreCheckins bool
)

Expand All @@ -136,6 +139,13 @@ func (s *serviceRuntime) Run(ctx context.Context, comm Communicator) (err error)
defer cisStop()

onStop := func(am actionMode) {
if stopping {
s.log.Debugf("service %s is already stopping: skipping...", s.name())
return
}
// the flag is set once and never reset since the serviceRuntime object
// is not supposed to be reused once it's stopping
stopping = true
// Stop check-in timer
s.log.Debugf("stop check-in timer for %s service", s.name())
checkinTimer.Stop()
Expand Down

0 comments on commit 97d9c80

Please sign in to comment.