diff --git a/changelog/fragments/1695920792-Prevent-multiple-stops-of-services.yaml b/changelog/fragments/1695920792-Prevent-multiple-stops-of-services.yaml new file mode 100644 index 00000000000..e15f5d6e927 --- /dev/null +++ b/changelog/fragments/1695920792-Prevent-multiple-stops-of-services.yaml @@ -0,0 +1,32 @@ +# Kind can be one of: +# - breaking-change: a change to previously-documented behavior +# - deprecation: functionality that is being removed in a later release +# - bug-fix: fixes a problem in a previous version +# - enhancement: extends functionality but does not break or fix existing behavior +# - feature: new functionality +# - known-issue: problems that we are aware of in a given version +# - security: impacts on the security of a product or a user’s deployment. +# - upgrade: important information for someone upgrading from a prior version +# - other: does not fit into any of the other categories +kind: bug-fix + +# Change summary; a 80ish characters long description of the change. +summary: Prevent multiple attempts to stop an already stopped service + +# Long description; in case the summary is not enough to describe the change +# this field accommodate a description without length limits. +# NOTE: This field will be rendered only for breaking-change and known-issue kinds at the moment. +#description: + +# Affected component; a word indicating the component this changeset affects. +component: runtime + +# PR URL; optional; the PR number that added the changeset. +# If not present is automatically filled by the tooling finding the PR where this changelog fragment has been added. +# NOTE: the tooling supports backports, so it's able to fill the original PR number instead of the backport PR number. +# Please provide it if you are adding a fragment for a different PR. +#pr: https://github.com/owner/repo/1234 + +# Issue URL; optional; the GitHub issue related to this changeset (either closes or is part of). +# If not present is automatically filled by the tooling with the issue linked to the PR number. +#issue: https://github.com/owner/repo/1234 diff --git a/pkg/component/runtime/manager.go b/pkg/component/runtime/manager.go index 42824794aea..8462ac3c17e 100644 --- a/pkg/component/runtime/manager.go +++ b/pkg/component/runtime/manager.go @@ -705,6 +705,7 @@ func (m *Manager) update(model component.Model, teardown bool) error { var stoppedWg sync.WaitGroup stoppedWg.Add(len(stop)) for _, existing := range stop { + m.logger.Debugf("Stopping component %q", existing.id) _ = existing.stop(teardown, model.Signed) // stop is async, wait for operation to finish, // otherwise new instance may be started and components @@ -755,6 +756,7 @@ func (m *Manager) waitForStopped(comp *componentRuntimeState) { for { latestState := comp.getLatest() if latestState.State == client.UnitStateStopped { + m.logger.Debugf("component %q stopped.", compID) return } @@ -767,6 +769,7 @@ func (m *Manager) waitForStopped(comp *componentRuntimeState) { select { case <-timeoutCh: + m.logger.Errorf("timeout exceeded waiting for component %q to stop", compID) return case <-time.After(stopCheckRetryPeriod): } diff --git a/pkg/component/runtime/runtime.go b/pkg/component/runtime/runtime.go index a04eec804bb..feeee2c6d7e 100644 --- a/pkg/component/runtime/runtime.go +++ b/pkg/component/runtime/runtime.go @@ -187,6 +187,10 @@ func (s *componentRuntimeState) start() error { } func (s *componentRuntimeState) stop(teardown bool, signed *component.Signed) error { + if s.shuttingDown.Load() { + // already stopping + return nil + } s.shuttingDown.Store(true) if teardown { return s.runtime.Teardown(signed) diff --git a/pkg/component/runtime/service.go b/pkg/component/runtime/service.go index 2ea47e2105c..5f007924d7f 100644 --- a/pkg/component/runtime/service.go +++ b/pkg/component/runtime/service.go @@ -38,6 +38,7 @@ var ( type executeServiceCommandFunc func(ctx context.Context, log *logger.Logger, binaryPath string, spec *component.ServiceOperationsCommandSpec) error // serviceRuntime provides the command runtime for running a component as a service. +// an instance of serviceRuntime is not reused: after being stopped, it cannot be started again. type serviceRuntime struct { comp component.Component log *logger.Logger @@ -124,6 +125,8 @@ func (s *serviceRuntime) Run(ctx context.Context, comm Communicator) (err error) lastCheckin time.Time missedCheckins int tearingDown bool + // flag that signals if we are already stopping + stopping bool ignoreCheckins bool ) @@ -136,6 +139,13 @@ func (s *serviceRuntime) Run(ctx context.Context, comm Communicator) (err error) defer cisStop() onStop := func(am actionMode) { + if stopping { + s.log.Debugf("service %s is already stopping: skipping...", s.name()) + return + } + // the flag is set once and never reset since the serviceRuntime object + // is not supposed to be reused once it's stopping + stopping = true // Stop check-in timer s.log.Debugf("stop check-in timer for %s service", s.name()) checkinTimer.Stop()