Skip to content

Commit

Permalink
Auto remove lockfile after unclean shutdown
Browse files Browse the repository at this point in the history
  • Loading branch information
agouin committed Nov 30, 2023
1 parent a1b5449 commit 41eedbc
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 30 deletions.
17 changes: 12 additions & 5 deletions signer/services.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ import (
cometservice "github.com/cometbft/cometbft/libs/service"
)

func RequireNotRunning(pidFilePath string) error {
func RequireNotRunning(log cometlog.Logger, pidFilePath string) error {
if _, err := os.Stat(pidFilePath); err != nil {
if os.IsNotExist(err) {
// lock file does not exist, can continue starting daemon
Expand Down Expand Up @@ -41,17 +41,24 @@ func RequireNotRunning(pidFilePath string) error {

process, err := os.FindProcess(int(pid))
if err != nil {
return fmt.Errorf(`unclean shutdown detected. PID file exists at %s but PID %d can not be found.
manual deletion of PID file required. %w`, pidFilePath, pid, err)
return fmt.Errorf("error checking pid %d: %w", pid, err)
}

err = process.Signal(syscall.Signal(0))
if err == nil {
return fmt.Errorf("horcrux is already running on PID: %d", pid)
}
if errors.Is(err, os.ErrProcessDone) {
return fmt.Errorf(`unclean shutdown detected. PID file exists at %s but PID %d is not running.
manual deletion of PID file required`, pidFilePath, pid)
log.Error(
"Unclean shutdown detected. PID file exists at but process with that ID cannot be found. Removing lock file",
"pid", pid,
"pid_file", pidFilePath,
"error", err,
)
if err := os.Remove(pidFilePath); err != nil {
return fmt.Errorf("failed to delete pid file %s: %w", pidFilePath, err)
}
return nil
}

errno, ok := err.(syscall.Errno)
Expand Down
45 changes: 20 additions & 25 deletions signer/services_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ import (
"fmt"
"os"
"path/filepath"
"runtime"
"strconv"
"strings"
"sync"
Expand Down Expand Up @@ -72,7 +71,7 @@ func TestIsRunning(t *testing.T) {
pidBz, err := os.ReadFile(pidFilePath)
require.NoError(t, err)

err = signer.RequireNotRunning(pidFilePath)
err = signer.RequireNotRunning(cometlog.NewNopLogger(), pidFilePath)
expectedErrorMsg := fmt.Sprintf("horcrux is already running on PID: %s", strings.TrimSpace(string(pidBz)))
require.EqualError(t, err, expectedErrorMsg)
}
Expand All @@ -81,20 +80,26 @@ func TestIsNotRunning(t *testing.T) {
homeDir := t.TempDir()
pidFilePath := filepath.Join(homeDir, "horcrux.pid")

err := signer.RequireNotRunning(pidFilePath)
err := signer.RequireNotRunning(cometlog.NewNopLogger(), pidFilePath)
require.NoError(t, err)
}

func getNonExistentPid() (int, error) {
func maxPid() int {
const defaultMaxPid = 100000
maxPidBytes, err := os.ReadFile("/proc/sys/kernel/pid_max")
if err != nil {
return -1, err
return defaultMaxPid
}
maxPid, err := strconv.ParseUint(strings.TrimSpace(string(maxPidBytes)), 10, 64)
maxPid, err := strconv.ParseInt(strings.TrimSpace(string(maxPidBytes)), 10, 32)
if err != nil {
return -1, err
return defaultMaxPid
}
for pid := 1; pid <= int(maxPid); pid++ {
return int(maxPid)
}

func getUnusedPid() (int, error) {
max := maxPid()
for pid := 1; pid <= max; pid++ {
process, err := os.FindProcess(pid)
if err != nil {
continue
Expand All @@ -106,26 +111,15 @@ func getNonExistentPid() (int, error) {
if errors.Is(err, os.ErrProcessDone) {
return pid, nil
}
errno, ok := err.(syscall.Errno)
if !ok {
continue
}
if errno == syscall.ESRCH {
return pid, nil
}
}
return -1, errors.New("could not find unused PID")
}

func TestIsRunningNonExistentPid(t *testing.T) {
if runtime.GOOS != "linux" {
t.Skip("test only valid on Linux")
}

homeDir := t.TempDir()
pidFilePath := filepath.Join(homeDir, "horcrux.pid")

pid, err := getNonExistentPid()
pid, err := getUnusedPid()
require.NoError(t, err)

err = os.WriteFile(
Expand All @@ -135,10 +129,11 @@ func TestIsRunningNonExistentPid(t *testing.T) {
)
require.NoError(t, err, "error writing pid file")

err = signer.RequireNotRunning(pidFilePath)
expectedErrorMsg := fmt.Sprintf(`unclean shutdown detected. PID file exists at %s but PID %d is not running.
manual deletion of PID file required`, pidFilePath, pid)
require.EqualError(t, err, expectedErrorMsg)
err = signer.RequireNotRunning(cometlog.NewNopLogger(), pidFilePath)
require.Nil(t, err)

_, err = os.Stat(pidFilePath)
require.ErrorIs(t, err, os.ErrNotExist)
}

func TestConcurrentStart(t *testing.T) {
Expand Down Expand Up @@ -216,7 +211,7 @@ func TestIsRunningAndWaitForService(t *testing.T) {
}
panicFunction := func() {
defer recoverFromPanic()
err = signer.RequireNotRunning(pidFilePath)
err = signer.RequireNotRunning(cometlog.NewNopLogger(), pidFilePath)
}
go panicFunction()
wg.Wait()
Expand Down

0 comments on commit 41eedbc

Please sign in to comment.