From 2508f850c8b247c529dd2c5c1c104d2265911676 Mon Sep 17 00:00:00 2001 From: Quinten <67589015+QuintenQVD0@users.noreply.github.com> Date: Sun, 27 Oct 2024 01:21:00 +0200 Subject: [PATCH] Wings: Fix stoplogic (#31) * Wings: FIx stoplogic * os.kill should fall under default * handle `^C` here and not on the panel side * change the signal logic and move the panel status call bellow the crash detection * allow for setting the stop to SIGKILL and add an extra comment * Move the default kill signal in the switch statement --- environment/docker/power.go | 46 ++++++++++++++++++++++++++----------- environment/environment.go | 3 +-- server/power.go | 3 +-- server/server.go | 16 ++++++------- 4 files changed, 43 insertions(+), 25 deletions(-) diff --git a/environment/docker/power.go b/environment/docker/power.go index 147ef84d..b71e787a 100644 --- a/environment/docker/power.go +++ b/environment/docker/power.go @@ -4,7 +4,6 @@ import ( "context" "os" "strings" - "syscall" "time" "emperror.dev/errors" @@ -148,16 +147,17 @@ func (e *Environment) Stop(ctx context.Context) error { log.WithField("container_id", e.Id).Warn("no stop configuration detected for environment, using termination procedure") } - signal := os.Kill - // Handle a few common cases, otherwise just fall through and just pass along - // the os.Kill signal to the process. + var signal string + // Handle a few common cases, otherwise just fall through and use the default SIGKILL. switch strings.ToUpper(s.Value) { case "SIGABRT": - signal = syscall.SIGABRT + signal = "SIGABRT" case "SIGINT": - signal = syscall.SIGINT - case "SIGTERM": - signal = syscall.SIGTERM + signal = "SIGINT" + case "SIGTERM", "C": + signal = "SIGTERM" + default: + signal = "SIGKILL" } return e.Terminate(ctx, signal) } @@ -221,7 +221,7 @@ func (e *Environment) WaitForStop(ctx context.Context, duration time.Duration, t doTermination := func(s string) error { e.log().WithField("step", s).WithField("duration", duration).Warn("container stop did not complete in time, terminating process...") - return e.Terminate(ctx, os.Kill) + return e.Terminate(ctx, "SIGKILL") } // We pass through the timed context for this stop action so that if one of the @@ -266,7 +266,7 @@ func (e *Environment) WaitForStop(ctx context.Context, duration time.Duration, t } // Terminate forcefully terminates the container using the signal provided. -func (e *Environment) Terminate(ctx context.Context, signal os.Signal) error { +func (e *Environment) Terminate(ctx context.Context, signal string) error { c, err := e.ContainerInspect(ctx) if err != nil { // Treat missing containers as an okay error state, means it is obviously @@ -289,12 +289,32 @@ func (e *Environment) Terminate(ctx context.Context, signal os.Signal) error { return nil } - // We set it to stopping than offline to prevent crash detection from being triggered. + // Timeout (optional) is the timeout (in seconds) to wait for the container + // to stop gracefully before forcibly terminating it with SIGKILL. + // + // - Use nil to use the default timeout (10 seconds). + // - Use '-1' to wait indefinitely. + // - Use '0' to not wait for the container to exit gracefully, and + // immediately proceeds to forcibly terminating the container. + // - Other positive values are used as timeout (in seconds). + var noWaitTimeout int + + // For every signal wait at max 10 seconds before SIGKILL is send (server did not stop in time) + // for SIGKILL just kill it without waiting + switch signal { + case "SIGKILL": + noWaitTimeout = 0 + default: + noWaitTimeout = 10 + } + + // We set it to stopping then offline to prevent crash detection from being triggered. e.SetState(environment.ProcessStoppingState) - sig := strings.TrimSuffix(strings.TrimPrefix(signal.String(), "signal "), "ed") - if err := e.client.ContainerKill(ctx, e.Id, sig); err != nil && !client.IsErrNotFound(err) { + + if err := e.client.ContainerStop(ctx, e.Id, container.StopOptions{Timeout: &noWaitTimeout, Signal: signal}); err != nil && !client.IsErrNotFound(err) { return errors.WithStack(err) } + e.SetState(environment.ProcessOfflineState) return nil diff --git a/environment/environment.go b/environment/environment.go index 4394108f..ba6b7dd8 100644 --- a/environment/environment.go +++ b/environment/environment.go @@ -2,7 +2,6 @@ package environment import ( "context" - "os" "time" "github.com/pelican-dev/wings/events" @@ -72,7 +71,7 @@ type ProcessEnvironment interface { // Terminate stops a running server instance using the provided signal. This function // is a no-op if the server is already stopped. - Terminate(ctx context.Context, signal os.Signal) error + Terminate(ctx context.Context, signal string) error // Destroys the environment removing any containers that were created (in Docker // environments at least). diff --git a/server/power.go b/server/power.go index 62171eb6..c8a15069 100644 --- a/server/power.go +++ b/server/power.go @@ -3,7 +3,6 @@ package server import ( "context" "fmt" - "os" "time" "emperror.dev/errors" @@ -161,7 +160,7 @@ func (s *Server) HandlePowerAction(action PowerAction, waitSeconds ...int) error return s.Environment.Start(s.Context()) case PowerActionTerminate: - return s.Environment.Terminate(s.Context(), os.Kill) + return s.Environment.Terminate(s.Context(), "SIGKILL") } return errors.New("attempting to handle unknown power action") diff --git a/server/server.go b/server/server.go index d4ab599e..0449eb1e 100644 --- a/server/server.go +++ b/server/server.go @@ -365,14 +365,6 @@ func (s *Server) OnStateChange() { s.Events().Publish(StatusEvent, st) } - // Push status update to Panel - sc := remote.ServerStateChange{prevState, st} - s.Log().WithField("state_change", sc).Debug("pushing server status change to panel") - err := s.client.PushServerStateChange(context.Background(), s.ID(), sc) - if err != nil { - s.Log().WithField("error", err).Error("error pushing server status change to panel") - } - // Reset the resource usage to 0 when the process fully stops so that all the UI // views in the Panel correctly display 0. if st == environment.ProcessOfflineState { @@ -402,6 +394,14 @@ func (s *Server) OnStateChange() { } }(s) } + + // Push status update to Panel + sc := remote.ServerStateChange{PrevState: prevState, NewState: st} + s.Log().WithField("state_change", sc).Debug("pushing server status change to panel") + err := s.client.PushServerStateChange(context.Background(), s.ID(), sc) + if err != nil { + s.Log().WithField("error", err).Error("error pushing server status change to panel") + } } // IsRunning determines if the server state is running or not. This is different