From 951e7f35c5c71cfce85dbac878621d790e298422 Mon Sep 17 00:00:00 2001 From: Oleg Kovalov Date: Mon, 1 Jul 2024 17:58:27 +0200 Subject: [PATCH 1/3] add log and os.exit --- store/store.go | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/store/store.go b/store/store.go index 657a22c7..06b3b17e 100644 --- a/store/store.go +++ b/store/store.go @@ -4,6 +4,7 @@ import ( "context" "errors" "fmt" + "os" "sync/atomic" "time" @@ -397,14 +398,26 @@ func (s *Store[H]) flushLoop() { startTime := time.Now() toFlush := s.pending.GetAll() - err := s.flush(ctx, toFlush...) - if err != nil { + + const flushRetries = 3 + for i := 0; i <= flushRetries; i++ { + err := s.flush(ctx, toFlush...) + if err == nil { + break + } + from, to := toFlush[0].Height(), toFlush[len(toFlush)-1].Height() - // TODO(@Wondertan): Should this be a fatal error case with os.Exit? - log.Errorw("writing header batch", "from", from, "to", to, "err", err) + if i == flushRetries { + log.Fatalw("writing header batch", "from", from, "to", to, "err", err) + os.Exit(1) + return + } + log.Errorw("writing header batch", "try", i+1, "from", from, "to", to, "err", err) s.metrics.flush(ctx, time.Since(startTime), s.pending.Len(), true) - continue + sleep := 10 * time.Duration(i+1) * time.Millisecond + time.Sleep(sleep) } + s.metrics.flush(ctx, time.Since(startTime), s.pending.Len(), false) // reset pending s.pending.Reset() From 570a528789bda7c778b9df56a2213eb52947f0ce Mon Sep 17 00:00:00 2001 From: Oleg Kovalov Date: Mon, 1 Jul 2024 17:58:46 +0200 Subject: [PATCH 2/3] fix --- store/store.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/store/store.go b/store/store.go index 06b3b17e..aea81c13 100644 --- a/store/store.go +++ b/store/store.go @@ -408,7 +408,7 @@ func (s *Store[H]) flushLoop() { from, to := toFlush[0].Height(), toFlush[len(toFlush)-1].Height() if i == flushRetries { - log.Fatalw("writing header batch", "from", from, "to", to, "err", err) + log.Errorw("writing header batch", "from", from, "to", to, "err", err) os.Exit(1) return } From 6d75a2bbe287cb3fa2221b1365248aec11fe73de Mon Sep 17 00:00:00 2001 From: Oleg Kovalov Date: Tue, 23 Jul 2024 10:31:11 +0200 Subject: [PATCH 3/3] fix --- store/store.go | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/store/store.go b/store/store.go index aea81c13..6ea126ff 100644 --- a/store/store.go +++ b/store/store.go @@ -4,7 +4,6 @@ import ( "context" "errors" "fmt" - "os" "sync/atomic" "time" @@ -399,22 +398,18 @@ func (s *Store[H]) flushLoop() { startTime := time.Now() toFlush := s.pending.GetAll() - const flushRetries = 3 - for i := 0; i <= flushRetries; i++ { + for i := 0; ; i++ { err := s.flush(ctx, toFlush...) if err == nil { break } from, to := toFlush[0].Height(), toFlush[len(toFlush)-1].Height() - if i == flushRetries { - log.Errorw("writing header batch", "from", from, "to", to, "err", err) - os.Exit(1) - return - } log.Errorw("writing header batch", "try", i+1, "from", from, "to", to, "err", err) s.metrics.flush(ctx, time.Since(startTime), s.pending.Len(), true) - sleep := 10 * time.Duration(i+1) * time.Millisecond + + const maxRetrySleep = time.Second + sleep := min(10*time.Duration(i+1)*time.Millisecond, maxRetrySleep) time.Sleep(sleep) }