forked from stellar/stellar-cli
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
soroban-rpc: add generic panic handing (stellar#856)
* add generic panic handling. * update * rename * update per cr
- Loading branch information
1 parent
7b48b16
commit 3758c2e
Showing
4 changed files
with
230 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,106 @@ | ||
package util | ||
|
||
import ( | ||
"fmt" | ||
"os" | ||
"reflect" | ||
"runtime" | ||
"runtime/debug" | ||
"strings" | ||
|
||
"github.com/prometheus/client_golang/prometheus" | ||
"github.com/stellar/go/support/log" | ||
) | ||
|
||
var UnrecoverablePanicGroup = panicGroup{ | ||
logPanicsToStdErr: true, | ||
exitProcessOnPanic: true, | ||
} | ||
|
||
var RecoverablePanicGroup = panicGroup{ | ||
logPanicsToStdErr: true, | ||
exitProcessOnPanic: false, | ||
} | ||
|
||
type panicGroup struct { | ||
log *log.Entry | ||
logPanicsToStdErr bool | ||
exitProcessOnPanic bool | ||
panicsCounter prometheus.Counter | ||
} | ||
|
||
func (pg *panicGroup) Log(log *log.Entry) *panicGroup { | ||
return &panicGroup{ | ||
log: log, | ||
logPanicsToStdErr: pg.logPanicsToStdErr, | ||
exitProcessOnPanic: pg.exitProcessOnPanic, | ||
panicsCounter: pg.panicsCounter, | ||
} | ||
} | ||
|
||
func (pg *panicGroup) Counter(counter prometheus.Counter) *panicGroup { | ||
return &panicGroup{ | ||
log: pg.log, | ||
logPanicsToStdErr: pg.logPanicsToStdErr, | ||
exitProcessOnPanic: pg.exitProcessOnPanic, | ||
panicsCounter: counter, | ||
} | ||
} | ||
|
||
// panicGroup give us the ability to spin a goroutine, with clear upfront definitions on what should be done in the | ||
// case of an internal panic. | ||
func (pg *panicGroup) Go(fn func()) { | ||
go func() { | ||
defer pg.recoverRoutine(fn) | ||
fn() | ||
}() | ||
} | ||
|
||
func (pg *panicGroup) recoverRoutine(fn func()) { | ||
recoverRes := recover() | ||
if recoverRes == nil { | ||
return | ||
} | ||
var cs []string | ||
if pg.log != nil { | ||
cs = getPanicCallStack(fn) | ||
for _, line := range cs { | ||
pg.log.Warn(line) | ||
} | ||
} | ||
if pg.logPanicsToStdErr { | ||
if len(cs) == 0 { | ||
cs = getPanicCallStack(fn) | ||
} | ||
for _, line := range cs { | ||
fmt.Fprintln(os.Stderr, line) | ||
} | ||
} | ||
|
||
if pg.panicsCounter != nil { | ||
pg.panicsCounter.Inc() | ||
} | ||
if pg.exitProcessOnPanic { | ||
os.Exit(1) | ||
} | ||
} | ||
|
||
func getPanicCallStack(fn func()) (outCallStack []string) { | ||
functionName := runtime.FuncForPC(reflect.ValueOf(fn).Pointer()).Name() | ||
outCallStack = append(outCallStack, fmt.Sprintf("panicing root function '%s'", functionName)) | ||
// while we're within the recoverRoutine, the debug.Stack() would return the | ||
// call stack where the panic took place. | ||
callStackStrings := string(debug.Stack()) | ||
for i, callStackLine := range strings.FieldsFunc(callStackStrings, func(r rune) bool { return r == '\n' || r == '\t' }) { | ||
// skip the first 5 entries, since these are the "debug.Stack()" entries, which aren't really useful. | ||
if i < 5 { | ||
continue | ||
} | ||
outCallStack = append(outCallStack, callStackLine) | ||
// once we reached the panicGroup entry, stop. | ||
if strings.Contains(callStackLine, "(*panicGroup).Go") { | ||
break | ||
} | ||
} | ||
return outCallStack | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,111 @@ | ||
package util | ||
|
||
import ( | ||
"os" | ||
"sync" | ||
"testing" | ||
"time" | ||
|
||
"github.com/sirupsen/logrus" | ||
"github.com/stellar/go/support/log" | ||
"github.com/stretchr/testify/require" | ||
) | ||
|
||
func TestTrivialPanicGroup(t *testing.T) { | ||
ch := make(chan int) | ||
|
||
panicGroup := panicGroup{} | ||
panicGroup.Go(func() { ch <- 1 }) | ||
|
||
<-ch | ||
} | ||
|
||
type TestLogsCounter struct { | ||
entry *log.Entry | ||
mu sync.Mutex | ||
writtenLogEntries [logrus.TraceLevel + 1]int | ||
} | ||
|
||
func makeTestLogCounter() *TestLogsCounter { | ||
out := &TestLogsCounter{ | ||
entry: log.New(), | ||
} | ||
out.entry.AddHook(out) | ||
out.entry.SetLevel(logrus.DebugLevel) | ||
return out | ||
} | ||
func (te *TestLogsCounter) Entry() *log.Entry { | ||
return te.entry | ||
} | ||
func (te *TestLogsCounter) Levels() []logrus.Level { | ||
return []logrus.Level{logrus.PanicLevel, logrus.FatalLevel, logrus.ErrorLevel, logrus.WarnLevel, logrus.InfoLevel, logrus.DebugLevel, logrus.TraceLevel} | ||
} | ||
func (te *TestLogsCounter) Fire(e *logrus.Entry) error { | ||
te.mu.Lock() | ||
defer te.mu.Unlock() | ||
te.writtenLogEntries[e.Level]++ | ||
return nil | ||
} | ||
func (te *TestLogsCounter) GetLevel(i int) int { | ||
te.mu.Lock() | ||
defer te.mu.Unlock() | ||
return te.writtenLogEntries[i] | ||
} | ||
|
||
func PanicingFunctionA(w *int) { | ||
*w = 0 | ||
} | ||
|
||
func IndirectPanicingFunctionB() { | ||
PanicingFunctionA(nil) | ||
} | ||
|
||
func IndirectPanicingFunctionC() { | ||
IndirectPanicingFunctionB() | ||
} | ||
|
||
func TestPanicGroupLog(t *testing.T) { | ||
logCounter := makeTestLogCounter() | ||
panicGroup := panicGroup{ | ||
log: logCounter.Entry(), | ||
} | ||
panicGroup.Go(IndirectPanicingFunctionC) | ||
// wait until we get all the log entries. | ||
waitStarted := time.Now() | ||
for time.Since(waitStarted) < 5*time.Second { | ||
warningCount := logCounter.GetLevel(3) | ||
if warningCount >= 10 { | ||
return | ||
} | ||
time.Sleep(1 * time.Millisecond) | ||
} | ||
t.FailNow() | ||
} | ||
|
||
func TestPanicGroupStdErr(t *testing.T) { | ||
tmpFile, err := os.CreateTemp("", "TestPanicGroupStdErr") | ||
require.NoError(t, err) | ||
defaultStdErr := os.Stderr | ||
os.Stderr = tmpFile | ||
defer func() { | ||
os.Stderr = defaultStdErr | ||
tmpFile.Close() | ||
os.Remove(tmpFile.Name()) | ||
}() | ||
|
||
panicGroup := panicGroup{ | ||
logPanicsToStdErr: true, | ||
} | ||
panicGroup.Go(IndirectPanicingFunctionC) | ||
// wait until we get all the log entries. | ||
waitStarted := time.Now() | ||
for time.Since(waitStarted) < 5*time.Second { | ||
outErrBytes, err := os.ReadFile(tmpFile.Name()) | ||
require.NoError(t, err) | ||
if len(outErrBytes) >= 100 { | ||
return | ||
} | ||
time.Sleep(1 * time.Millisecond) | ||
} | ||
t.FailNow() | ||
} |