diff --git a/btcclient/client.go b/btcclient/client.go index 8da20dcd..91e9e4a0 100644 --- a/btcclient/client.go +++ b/btcclient/client.go @@ -31,6 +31,7 @@ type Client struct { // retry attributes retrySleepTime time.Duration maxRetrySleepTime time.Duration + maxRetryTimes uint } func (c *Client) GetTipBlockVerbose() (*btcjson.GetBlockVerboseResult, error) { diff --git a/btcclient/client_wallet.go b/btcclient/client_wallet.go index fef4835d..83f1b9e8 100644 --- a/btcclient/client_wallet.go +++ b/btcclient/client_wallet.go @@ -19,22 +19,25 @@ import ( // used by vigilant submitter // a wallet is essentially a BTC client // that connects to the btcWallet daemon -func NewWallet(cfg *config.BTCConfig, parentLogger *zap.Logger) (*Client, error) { - params, err := netparams.GetBTCParams(cfg.NetParams) +func NewWallet(cfg *config.Config, parentLogger *zap.Logger) (*Client, error) { + params, err := netparams.GetBTCParams(cfg.BTC.NetParams) if err != nil { return nil, err } wallet := &Client{} - wallet.cfg = cfg + wallet.cfg = &cfg.BTC wallet.params = params wallet.logger = parentLogger.With(zap.String("module", "btcclient_wallet")).Sugar() + wallet.retrySleepTime = cfg.Common.RetrySleepTime + wallet.maxRetryTimes = cfg.Common.MaxRetryTimes + wallet.maxRetrySleepTime = cfg.Common.MaxRetrySleepTime connCfg := &rpcclient.ConnConfig{ // this will work with node loaded with multiple wallets - Host: cfg.Endpoint + "/wallet/" + cfg.WalletName, + Host: cfg.BTC.Endpoint + "/wallet/" + cfg.BTC.WalletName, HTTPPostMode: true, - User: cfg.Username, - Pass: cfg.Password, + User: cfg.BTC.Username, + Pass: cfg.BTC.Password, DisableTLS: true, } diff --git a/btcclient/query.go b/btcclient/query.go index c62b4cf3..512cb577 100644 --- a/btcclient/query.go +++ b/btcclient/query.go @@ -3,7 +3,7 @@ package btcclient import ( "fmt" - "github.com/babylonlabs-io/babylon/types/retry" + "github.com/avast/retry-go/v4" "github.com/btcsuite/btcd/btcjson" "github.com/btcsuite/btcd/chaincfg/chainhash" "github.com/btcsuite/btcd/wire" @@ -66,13 +66,17 @@ func (c *Client) getBestBlockHashWithRetry() (*chainhash.Hash, error) { err error ) - if err := retry.Do(c.retrySleepTime, c.maxRetrySleepTime, func() error { + if err := retry.Do(func() error { blockHash, err = c.GetBestBlockHash() if err != nil { return err } return nil - }); err != nil { + }, + retry.Delay(c.retrySleepTime), + retry.MaxDelay(c.maxRetrySleepTime), + retry.Attempts(c.maxRetryTimes), + ); err != nil { c.logger.Debug( "failed to query the best block hash", zap.Error(err)) } @@ -86,13 +90,17 @@ func (c *Client) getBlockHashWithRetry(height uint64) (*chainhash.Hash, error) { err error ) - if err := retry.Do(c.retrySleepTime, c.maxRetrySleepTime, func() error { + if err := retry.Do(func() error { blockHash, err = c.GetBlockHash(int64(height)) if err != nil { return err } return nil - }); err != nil { + }, + retry.Delay(c.retrySleepTime), + retry.MaxDelay(c.maxRetrySleepTime), + retry.Attempts(c.maxRetryTimes), + ); err != nil { c.logger.Debug( "failed to query the block hash", zap.Uint64("height", height), zap.Error(err)) } @@ -106,13 +114,17 @@ func (c *Client) getBlockWithRetry(hash *chainhash.Hash) (*wire.MsgBlock, error) err error ) - if err := retry.Do(c.retrySleepTime, c.maxRetrySleepTime, func() error { + if err := retry.Do(func() error { block, err = c.GetBlock(hash) if err != nil { return err } return nil - }); err != nil { + }, + retry.Delay(c.retrySleepTime), + retry.MaxDelay(c.maxRetrySleepTime), + retry.Attempts(c.maxRetryTimes), + ); err != nil { c.logger.Debug( "failed to query the block", zap.String("hash", hash.String()), zap.Error(err)) } @@ -126,13 +138,17 @@ func (c *Client) getBlockVerboseWithRetry(hash *chainhash.Hash) (*btcjson.GetBlo err error ) - if err := retry.Do(c.retrySleepTime, c.maxRetrySleepTime, func() error { + if err := retry.Do(func() error { blockVerbose, err = c.GetBlockVerbose(hash) if err != nil { return err } return nil - }); err != nil { + }, + retry.Delay(c.retrySleepTime), + retry.MaxDelay(c.maxRetrySleepTime), + retry.Attempts(c.maxRetryTimes), + ); err != nil { c.logger.Debug( "failed to query the block verbose", zap.String("hash", hash.String()), zap.Error(err)) } diff --git a/btcstaking-tracker/atomicslasher/atomic_slasher.go b/btcstaking-tracker/atomicslasher/atomic_slasher.go index 3a17e96b..95108251 100644 --- a/btcstaking-tracker/atomicslasher/atomic_slasher.go +++ b/btcstaking-tracker/atomicslasher/atomic_slasher.go @@ -47,6 +47,7 @@ func New( parentLogger *zap.Logger, retrySleepTime time.Duration, maxRetrySleepTime time.Duration, + maxRetryTimes uint, btcClient btcclient.BTCClient, btcNotifier notifier.ChainNotifier, bbnClient BabylonClient, @@ -54,7 +55,7 @@ func New( metrics *metrics.AtomicSlasherMetrics, ) *AtomicSlasher { logger := parentLogger.With(zap.String("module", "atomic_slasher")) - bbnAdapter := NewBabylonAdapter(logger, cfg, retrySleepTime, maxRetrySleepTime, bbnClient) + bbnAdapter := NewBabylonAdapter(logger, cfg, retrySleepTime, maxRetrySleepTime, maxRetryTimes, bbnClient) return &AtomicSlasher{ quit: make(chan struct{}), cfg: cfg, diff --git a/btcstaking-tracker/atomicslasher/babylon_adapter.go b/btcstaking-tracker/atomicslasher/babylon_adapter.go index 50bd341c..0365c408 100644 --- a/btcstaking-tracker/atomicslasher/babylon_adapter.go +++ b/btcstaking-tracker/atomicslasher/babylon_adapter.go @@ -20,6 +20,7 @@ type BabylonAdapter struct { cfg *config.BTCStakingTrackerConfig retrySleepTime time.Duration maxRetrySleepTime time.Duration + maxRetryTimes uint bbnClient BabylonClient } @@ -28,6 +29,7 @@ func NewBabylonAdapter( cfg *config.BTCStakingTrackerConfig, retrySleepTime time.Duration, maxRetrySleepTime time.Duration, + maxRetryTimes uint, bbnClient BabylonClient, ) *BabylonAdapter { return &BabylonAdapter{ @@ -35,6 +37,7 @@ func NewBabylonAdapter( cfg: cfg, retrySleepTime: retrySleepTime, maxRetrySleepTime: maxRetrySleepTime, + maxRetryTimes: maxRetryTimes, bbnClient: bbnClient, } } @@ -53,6 +56,7 @@ func (ba *BabylonAdapter) BTCStakingParams(ctx context.Context, version uint32) retry.Context(ctx), retry.Delay(ba.retrySleepTime), retry.MaxDelay(ba.maxRetrySleepTime), + retry.Attempts(ba.maxRetryTimes), ) return bsParams, err @@ -72,6 +76,7 @@ func (ba *BabylonAdapter) BTCDelegation(ctx context.Context, stakingTxHashHex st retry.Context(ctx), retry.Delay(ba.retrySleepTime), retry.MaxDelay(ba.maxRetrySleepTime), + retry.Attempts(ba.maxRetryTimes), ) return resp, err diff --git a/btcstaking-tracker/btcslasher/bootstrapping_test.go b/btcstaking-tracker/btcslasher/bootstrapping_test.go index 57eb7ff2..0ff2acf2 100644 --- a/btcstaking-tracker/btcslasher/bootstrapping_test.go +++ b/btcstaking-tracker/btcslasher/bootstrapping_test.go @@ -61,7 +61,17 @@ func FuzzSlasher_Bootstrapping(f *testing.F) { logger, err := config.NewRootLogger("auto", "debug") require.NoError(t, err) slashedFPSKChan := make(chan *btcec.PrivateKey, 100) - btcSlasher, err := btcslasher.New(logger, mockBTCClient, mockBabylonQuerier, &chaincfg.SimNetParams, commonCfg.RetrySleepTime, commonCfg.MaxRetrySleepTime, slashedFPSKChan, metrics.NewBTCStakingTrackerMetrics().SlasherMetrics) + btcSlasher, err := btcslasher.New( + logger, + mockBTCClient, + mockBabylonQuerier, + &chaincfg.SimNetParams, + commonCfg.RetrySleepTime, + commonCfg.MaxRetrySleepTime, + commonCfg.MaxRetryTimes, + slashedFPSKChan, + metrics.NewBTCStakingTrackerMetrics().SlasherMetrics, + ) require.NoError(t, err) // slashing address diff --git a/btcstaking-tracker/btcslasher/slasher.go b/btcstaking-tracker/btcslasher/slasher.go index 0ba37bf3..a68b13ed 100644 --- a/btcstaking-tracker/btcslasher/slasher.go +++ b/btcstaking-tracker/btcslasher/slasher.go @@ -35,7 +35,7 @@ type BTCSlasher struct { btcFinalizationTimeout uint64 retrySleepTime time.Duration maxRetrySleepTime time.Duration - + maxRetryTimes uint // channel for finality signature messages, which might include // equivocation evidences finalitySigChan <-chan coretypes.ResultEvent @@ -59,6 +59,7 @@ func New( netParams *chaincfg.Params, retrySleepTime time.Duration, maxRetrySleepTime time.Duration, + maxRetryTimes uint, slashedFPSKChan chan *btcec.PrivateKey, metrics *metrics.SlasherMetrics, ) (*BTCSlasher, error) { @@ -71,6 +72,7 @@ func New( netParams: netParams, retrySleepTime: retrySleepTime, maxRetrySleepTime: maxRetrySleepTime, + maxRetryTimes: maxRetryTimes, slashedFPSKChan: slashedFPSKChan, slashResultChan: make(chan *SlashResult, 1000), quit: make(chan struct{}), diff --git a/btcstaking-tracker/btcslasher/slasher_test.go b/btcstaking-tracker/btcslasher/slasher_test.go index 9debfe61..d5cc1347 100644 --- a/btcstaking-tracker/btcslasher/slasher_test.go +++ b/btcstaking-tracker/btcslasher/slasher_test.go @@ -71,7 +71,17 @@ func FuzzSlasher(f *testing.F) { logger, err := config.NewRootLogger("auto", "debug") require.NoError(t, err) slashedFPSKChan := make(chan *btcec.PrivateKey, 100) - btcSlasher, err := btcslasher.New(logger, mockBTCClient, mockBabylonQuerier, &chaincfg.SimNetParams, commonCfg.RetrySleepTime, commonCfg.MaxRetrySleepTime, slashedFPSKChan, metrics.NewBTCStakingTrackerMetrics().SlasherMetrics) + btcSlasher, err := btcslasher.New( + logger, + mockBTCClient, + mockBabylonQuerier, + &chaincfg.SimNetParams, + commonCfg.RetrySleepTime, + commonCfg.MaxRetrySleepTime, + commonCfg.MaxRetryTimes, + slashedFPSKChan, + metrics.NewBTCStakingTrackerMetrics().SlasherMetrics, + ) require.NoError(t, err) err = btcSlasher.LoadParams() require.NoError(t, err) diff --git a/btcstaking-tracker/btcslasher/slasher_utils.go b/btcstaking-tracker/btcslasher/slasher_utils.go index 08e77bab..e8a38827 100644 --- a/btcstaking-tracker/btcslasher/slasher_utils.go +++ b/btcstaking-tracker/btcslasher/slasher_utils.go @@ -66,6 +66,7 @@ func (bs *BTCSlasher) slashBTCDelegation( retry.Context(ctx), retry.Delay(bs.retrySleepTime), retry.MaxDelay(bs.maxRetrySleepTime), + retry.Attempts(bs.maxRetryTimes), ) slashRes := &SlashResult{ diff --git a/btcstaking-tracker/tracker.go b/btcstaking-tracker/tracker.go index 0894619f..ca6a696b 100644 --- a/btcstaking-tracker/tracker.go +++ b/btcstaking-tracker/tracker.go @@ -87,6 +87,7 @@ func NewBTCSTakingTracker( btcParams, commonCfg.RetrySleepTime, commonCfg.MaxRetrySleepTime, + commonCfg.MaxRetryTimes, slashedFPSKChan, metrics.SlasherMetrics, ) @@ -100,6 +101,7 @@ func NewBTCSTakingTracker( logger, commonCfg.RetrySleepTime, commonCfg.MaxRetrySleepTime, + commonCfg.MaxRetryTimes, btcClient, btcNotifier, bbnClient, diff --git a/cmd/vigilante/cmd/btcstaking_tracker.go b/cmd/vigilante/cmd/btcstaking_tracker.go index 1874b753..3f650e44 100644 --- a/cmd/vigilante/cmd/btcstaking_tracker.go +++ b/cmd/vigilante/cmd/btcstaking_tracker.go @@ -60,7 +60,7 @@ func GetBTCStakingTracker() *cobra.Command { // create BTC client and connect to BTC server // Note that monitor needs to subscribe to new BTC blocks - btcClient, err := btcclient.NewWallet(&cfg.BTC, rootLogger) + btcClient, err := btcclient.NewWallet(&cfg, rootLogger) if err != nil { panic(fmt.Errorf("failed to open BTC client: %w", err)) diff --git a/cmd/vigilante/cmd/monitor.go b/cmd/vigilante/cmd/monitor.go index fe2b7423..8a5b2197 100644 --- a/cmd/vigilante/cmd/monitor.go +++ b/cmd/vigilante/cmd/monitor.go @@ -61,7 +61,7 @@ func GetMonitorCmd() *cobra.Command { } // create BTC client and connect to BTC server - btcClient, err = btcclient.NewWallet(&cfg.BTC, rootLogger) + btcClient, err = btcclient.NewWallet(&cfg, rootLogger) if err != nil { panic(fmt.Errorf("failed to open BTC client: %w", err)) } diff --git a/cmd/vigilante/cmd/reporter.go b/cmd/vigilante/cmd/reporter.go index 0d763d18..ec4b1596 100644 --- a/cmd/vigilante/cmd/reporter.go +++ b/cmd/vigilante/cmd/reporter.go @@ -47,7 +47,7 @@ func GetReporterCmd() *cobra.Command { // create BTC client and connect to BTC server // Note that vigilant reporter needs to subscribe to new BTC blocks - btcClient, err = btcclient.NewWallet(&cfg.BTC, rootLogger) + btcClient, err = btcclient.NewWallet(&cfg, rootLogger) if err != nil { panic(fmt.Errorf("failed to open BTC client: %w", err)) } diff --git a/cmd/vigilante/cmd/submitter.go b/cmd/vigilante/cmd/submitter.go index c0be7412..bc97ae41 100644 --- a/cmd/vigilante/cmd/submitter.go +++ b/cmd/vigilante/cmd/submitter.go @@ -34,7 +34,7 @@ func GetSubmitterCmd() *cobra.Command { } // create BTC wallet and connect to BTC server - btcWallet, err := btcclient.NewWallet(&cfg.BTC, rootLogger) + btcWallet, err := btcclient.NewWallet(&cfg, rootLogger) if err != nil { panic(fmt.Errorf("failed to open BTC client: %w", err)) } @@ -71,6 +71,7 @@ func GetSubmitterCmd() *cobra.Command { submitterAddr, cfg.Common.RetrySleepTime, cfg.Common.MaxRetrySleepTime, + cfg.Common.MaxRetryTimes, submitterMetrics, ) if err != nil { diff --git a/config/common.go b/config/common.go index bc6afecc..b1c43e1b 100644 --- a/config/common.go +++ b/config/common.go @@ -10,6 +10,7 @@ import ( const ( defaultRetrySleepTime = 5 * time.Second defaultMaxRetrySleepTime = 5 * time.Minute + defaultMaxRetryTimes = 5 ) // CommonConfig defines the server's basic configuration @@ -23,6 +24,8 @@ type CommonConfig struct { // Maximum backoff interval between retries. Exponential backoff leads to interval increase. // This value is the cap of the interval, when exceeded the retries stop. MaxRetrySleepTime time.Duration `mapstructure:"max-retry-sleep-time"` + // The max number of retries in case of a failure + MaxRetryTimes uint `mapstructure:"max-retry-times"` } func isOneOf(v string, list []string) bool { @@ -60,5 +63,6 @@ func DefaultCommonConfig() CommonConfig { LogLevel: "debug", RetrySleepTime: defaultRetrySleepTime, MaxRetrySleepTime: defaultMaxRetrySleepTime, + MaxRetryTimes: defaultMaxRetryTimes, } } diff --git a/e2etest/submitter_e2e_test.go b/e2etest/submitter_e2e_test.go index 55284a8f..0968e806 100644 --- a/e2etest/submitter_e2e_test.go +++ b/e2etest/submitter_e2e_test.go @@ -62,6 +62,7 @@ func TestSubmitterSubmission(t *testing.T) { subAddr, tm.Config.Common.RetrySleepTime, tm.Config.Common.MaxRetrySleepTime, + tm.Config.Common.MaxRetryTimes, metrics.NewSubmitterMetrics(), ) @@ -135,6 +136,7 @@ func TestSubmitterSubmissionReplace(t *testing.T) { subAddr, tm.Config.Common.RetrySleepTime, tm.Config.Common.MaxRetrySleepTime, + tm.Config.Common.MaxRetryTimes, metrics.NewSubmitterMetrics(), ) diff --git a/e2etest/test_manager.go b/e2etest/test_manager.go index d5b9936e..6081b446 100644 --- a/e2etest/test_manager.go +++ b/e2etest/test_manager.go @@ -60,7 +60,7 @@ type TestManager struct { } func initBTCClientWithSubscriber(t *testing.T, cfg *config.Config) *btcclient.Client { - client, err := btcclient.NewWallet(&cfg.BTC, zap.NewNop()) + client, err := btcclient.NewWallet(cfg, zap.NewNop()) require.NoError(t, err) // let's wait until chain rpc becomes available diff --git a/go.mod b/go.mod index 71a53725..d5e72d6a 100644 --- a/go.mod +++ b/go.mod @@ -6,7 +6,7 @@ require ( cosmossdk.io/errors v1.0.1 cosmossdk.io/log v1.3.1 cosmossdk.io/math v1.3.0 - github.com/avast/retry-go/v4 v4.5.1 + github.com/avast/retry-go/v4 v4.6.0 github.com/babylonlabs-io/babylon v0.9.1 github.com/boljen/go-bitmap v0.0.0-20151001105940-23cd2fb0ce7d github.com/btcsuite/btcd v0.24.2 diff --git a/go.sum b/go.sum index aa5d1149..7ad26eaa 100644 --- a/go.sum +++ b/go.sum @@ -275,8 +275,8 @@ github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5 github.com/armon/go-metrics v0.0.0-20180917152333-f0300d1749da/go.mod h1:Q73ZrmVTwzkszR9V5SSuryQ31EELlFMUz1kKyl939pY= github.com/armon/go-radix v0.0.0-20180808171621-7fddfc383310/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8= github.com/aryann/difflib v0.0.0-20170710044230-e206f873d14a/go.mod h1:DAHtR1m6lCRdSC2Tm3DSWRPvIPr6xNKyeHdqDQSQT+A= -github.com/avast/retry-go/v4 v4.5.1 h1:AxIx0HGi4VZ3I02jr78j5lZ3M6x1E0Ivxa6b0pUUh7o= -github.com/avast/retry-go/v4 v4.5.1/go.mod h1:/sipNsvNB3RRuT5iNcb6h73nw3IBmXJ/H3XrCQYSOpc= +github.com/avast/retry-go/v4 v4.6.0 h1:K9xNA+KeB8HHc2aWFuLb25Offp+0iVRXEvFx8IinRJA= +github.com/avast/retry-go/v4 v4.6.0/go.mod h1:gvWlPhBVsvBbLkVGDg/KwvBv0bEkCOLRRSHKIr2PyOE= github.com/aws/aws-lambda-go v1.13.3/go.mod h1:4UKl9IzQMoD+QF79YdCuzCwp8VbmG4VAQwij/eHl5CU= github.com/aws/aws-sdk-go v1.27.0/go.mod h1:KmX6BPdI08NWTb3/sm4ZGu5ShLoqVDhKgpiN924inxo= github.com/aws/aws-sdk-go v1.44.122/go.mod h1:y4AeaBuwd2Lk+GepC1E9v0qOiTws0MIWAX4oIKwKHZo= diff --git a/monitor/liveness_checker_test.go b/monitor/liveness_checker_test.go index eb36c957..f8685306 100644 --- a/monitor/liveness_checker_test.go +++ b/monitor/liveness_checker_test.go @@ -1,20 +1,18 @@ package monitor_test import ( - "math/rand" - "testing" - bbndatagen "github.com/babylonlabs-io/babylon/testutil/datagen" bbntypes "github.com/babylonlabs-io/babylon/types" btclctypes "github.com/babylonlabs-io/babylon/x/btclightclient/types" monitortypes "github.com/babylonlabs-io/babylon/x/monitor/types" - "github.com/golang/mock/gomock" - "github.com/stretchr/testify/require" - "github.com/babylonlabs-io/vigilante/config" "github.com/babylonlabs-io/vigilante/monitor" "github.com/babylonlabs-io/vigilante/testutil/datagen" "github.com/babylonlabs-io/vigilante/types" + "github.com/golang/mock/gomock" + "github.com/stretchr/testify/require" + "math/rand" + "testing" ) func FuzzLivenessChecker(f *testing.F) { @@ -33,6 +31,7 @@ func FuzzLivenessChecker(f *testing.F) { ComCfg: &config.CommonConfig{ RetrySleepTime: 1, MaxRetrySleepTime: 0, + MaxRetryTimes: 1, }, BBNQuerier: mockBabylonClient, } diff --git a/monitor/query.go b/monitor/query.go index bcbaff76..5863d05c 100644 --- a/monitor/query.go +++ b/monitor/query.go @@ -2,8 +2,9 @@ package monitor import ( "fmt" + "github.com/avast/retry-go/v4" + "github.com/babylonlabs-io/vigilante/retrywrap" - "github.com/babylonlabs-io/babylon/types/retry" btclctypes "github.com/babylonlabs-io/babylon/x/btclightclient/types" ckpttypes "github.com/babylonlabs-io/babylon/x/checkpointing/types" epochingtypes "github.com/babylonlabs-io/babylon/x/epoching/types" @@ -61,7 +62,7 @@ func (m *Monitor) FindTipConfirmedEpoch() (uint64, error) { func (m *Monitor) queryCurrentEpochWithRetry() (*epochingtypes.QueryCurrentEpochResponse, error) { var currentEpochRes epochingtypes.QueryCurrentEpochResponse - if err := retry.Do(m.ComCfg.RetrySleepTime, m.ComCfg.MaxRetrySleepTime, func() error { + if err := retrywrap.Do(func() error { res, err := m.BBNQuerier.CurrentEpoch() if err != nil { return err @@ -69,7 +70,11 @@ func (m *Monitor) queryCurrentEpochWithRetry() (*epochingtypes.QueryCurrentEpoch currentEpochRes = *res return nil - }); err != nil { + }, + retry.Delay(m.ComCfg.RetrySleepTime), + retry.MaxDelay(m.ComCfg.MaxRetrySleepTime), + retry.Attempts(m.ComCfg.MaxRetryTimes), + ); err != nil { m.logger.Debug( "failed to query the current epoch", zap.Error(err)) @@ -82,7 +87,7 @@ func (m *Monitor) queryCurrentEpochWithRetry() (*epochingtypes.QueryCurrentEpoch func (m *Monitor) queryRawCheckpointWithRetry(epoch uint64) (*ckpttypes.QueryRawCheckpointResponse, error) { var rawCheckpointRes ckpttypes.QueryRawCheckpointResponse - if err := retry.Do(m.ComCfg.RetrySleepTime, m.ComCfg.MaxRetrySleepTime, func() error { + if err := retrywrap.Do(func() error { res, err := m.BBNQuerier.RawCheckpoint(epoch) if err != nil { return err @@ -90,7 +95,11 @@ func (m *Monitor) queryRawCheckpointWithRetry(epoch uint64) (*ckpttypes.QueryRaw rawCheckpointRes = *res return nil - }); err != nil { + }, + retry.Delay(m.ComCfg.RetrySleepTime), + retry.MaxDelay(m.ComCfg.MaxRetrySleepTime), + retry.Attempts(m.ComCfg.MaxRetryTimes), + ); err != nil { m.logger.Debug( "failed to query the raw checkpoint", zap.Error(err)) @@ -103,7 +112,7 @@ func (m *Monitor) queryRawCheckpointWithRetry(epoch uint64) (*ckpttypes.QueryRaw func (m *Monitor) queryBlsPublicKeyListWithRetry(epoch uint64) (*ckpttypes.QueryBlsPublicKeyListResponse, error) { var blsPublicKeyListRes ckpttypes.QueryBlsPublicKeyListResponse - if err := retry.Do(m.ComCfg.RetrySleepTime, m.ComCfg.MaxRetrySleepTime, func() error { + if err := retrywrap.Do(func() error { res, err := m.BBNQuerier.BlsPublicKeyList(epoch, nil) if err != nil { return err @@ -111,7 +120,11 @@ func (m *Monitor) queryBlsPublicKeyListWithRetry(epoch uint64) (*ckpttypes.Query blsPublicKeyListRes = *res return nil - }); err != nil { + }, + retry.Delay(m.ComCfg.RetrySleepTime), + retry.MaxDelay(m.ComCfg.MaxRetrySleepTime), + retry.Attempts(m.ComCfg.MaxRetryTimes), + ); err != nil { m.logger.Debug( "failed to query the BLS public key list", zap.Error(err)) @@ -124,7 +137,7 @@ func (m *Monitor) queryBlsPublicKeyListWithRetry(epoch uint64) (*ckpttypes.Query func (m *Monitor) queryEndedEpochBTCHeightWithRetry(epoch uint64) (*monitortypes.QueryEndedEpochBtcHeightResponse, error) { var endedEpochBTCHeightRes monitortypes.QueryEndedEpochBtcHeightResponse - if err := retry.Do(m.ComCfg.RetrySleepTime, m.ComCfg.MaxRetrySleepTime, func() error { + if err := retrywrap.Do(func() error { res, err := m.BBNQuerier.EndedEpochBTCHeight(epoch) if err != nil { return err @@ -132,7 +145,11 @@ func (m *Monitor) queryEndedEpochBTCHeightWithRetry(epoch uint64) (*monitortypes endedEpochBTCHeightRes = *res return nil - }); err != nil { + }, + retry.Delay(m.ComCfg.RetrySleepTime), + retry.MaxDelay(m.ComCfg.MaxRetrySleepTime), + retry.Attempts(m.ComCfg.MaxRetryTimes), + ); err != nil { m.logger.Debug( "failed to query the ended epoch BTC height", zap.Error(err)) @@ -145,7 +162,7 @@ func (m *Monitor) queryEndedEpochBTCHeightWithRetry(epoch uint64) (*monitortypes func (m *Monitor) queryReportedCheckpointBTCHeightWithRetry(hashStr string) (*monitortypes.QueryReportedCheckpointBtcHeightResponse, error) { var reportedCheckpointBtcHeightRes monitortypes.QueryReportedCheckpointBtcHeightResponse - if err := retry.Do(m.ComCfg.RetrySleepTime, m.ComCfg.MaxRetrySleepTime, func() error { + if err := retrywrap.Do(func() error { res, err := m.BBNQuerier.ReportedCheckpointBTCHeight(hashStr) if err != nil { return err @@ -153,7 +170,11 @@ func (m *Monitor) queryReportedCheckpointBTCHeightWithRetry(hashStr string) (*mo reportedCheckpointBtcHeightRes = *res return nil - }); err != nil { + }, + retry.Delay(m.ComCfg.RetrySleepTime), + retry.MaxDelay(m.ComCfg.MaxRetrySleepTime), + retry.Attempts(m.ComCfg.MaxRetryTimes), + ); err != nil { m.logger.Debug( "failed to query the reported checkpoint BTC height", zap.Error(err)) @@ -166,7 +187,7 @@ func (m *Monitor) queryReportedCheckpointBTCHeightWithRetry(hashStr string) (*mo func (m *Monitor) queryBTCHeaderChainTipWithRetry() (*btclctypes.QueryTipResponse, error) { var btcHeaderChainTipRes btclctypes.QueryTipResponse - if err := retry.Do(m.ComCfg.RetrySleepTime, m.ComCfg.MaxRetrySleepTime, func() error { + if err := retrywrap.Do(func() error { res, err := m.BBNQuerier.BTCHeaderChainTip() if err != nil { return err @@ -174,7 +195,11 @@ func (m *Monitor) queryBTCHeaderChainTipWithRetry() (*btclctypes.QueryTipRespons btcHeaderChainTipRes = *res return nil - }); err != nil { + }, + retry.Delay(m.ComCfg.RetrySleepTime), + retry.MaxDelay(m.ComCfg.MaxRetrySleepTime), + retry.Attempts(m.ComCfg.MaxRetryTimes), + ); err != nil { m.logger.Debug( "failed to query the BTC header chain tip", zap.Error(err)) @@ -187,7 +212,7 @@ func (m *Monitor) queryBTCHeaderChainTipWithRetry() (*btclctypes.QueryTipRespons func (m *Monitor) queryContainsBTCBlockWithRetry(blockHash *chainhash.Hash) (*btclctypes.QueryContainsBytesResponse, error) { var containsBTCBlockRes btclctypes.QueryContainsBytesResponse - if err := retry.Do(m.ComCfg.RetrySleepTime, m.ComCfg.MaxRetrySleepTime, func() error { + if err := retrywrap.Do(func() error { res, err := m.BBNQuerier.ContainsBTCBlock(blockHash) if err != nil { return err @@ -195,7 +220,11 @@ func (m *Monitor) queryContainsBTCBlockWithRetry(blockHash *chainhash.Hash) (*bt containsBTCBlockRes = *res return nil - }); err != nil { + }, + retry.Delay(m.ComCfg.RetrySleepTime), + retry.MaxDelay(m.ComCfg.MaxRetrySleepTime), + retry.Attempts(m.ComCfg.MaxRetryTimes), + ); err != nil { m.logger.Debug( "failed to query the contains BTC block", zap.Error(err)) diff --git a/reporter/reporter.go b/reporter/reporter.go index 5c2f757c..4590f9d8 100644 --- a/reporter/reporter.go +++ b/reporter/reporter.go @@ -3,12 +3,13 @@ package reporter import ( "encoding/hex" "fmt" + "github.com/babylonlabs-io/vigilante/retrywrap" notifier "github.com/lightningnetwork/lnd/chainntnfs" "sync" "time" + "github.com/avast/retry-go/v4" "github.com/babylonlabs-io/babylon/btctxformatter" - "github.com/babylonlabs-io/babylon/types/retry" btcctypes "github.com/babylonlabs-io/babylon/x/btccheckpoint/types" "github.com/babylonlabs-io/vigilante/btcclient" "github.com/babylonlabs-io/vigilante/config" @@ -57,10 +58,13 @@ func New( btccParamsRes *btcctypes.QueryParamsResponse err error ) - err = retry.Do(retrySleepTime, maxRetrySleepTime, func() error { + err = retrywrap.Do(func() error { btccParamsRes, err = babylonClient.BTCCheckpointParams() return err - }) + }, + retry.Delay(retrySleepTime), + retry.MaxDelay(maxRetrySleepTime), + ) if err != nil { return nil, fmt.Errorf("failed to get BTC Checkpoint parameters: %w", err) } diff --git a/reporter/utils.go b/reporter/utils.go index ce1ef528..efa5c236 100644 --- a/reporter/utils.go +++ b/reporter/utils.go @@ -3,11 +3,12 @@ package reporter import ( "context" "fmt" + "github.com/babylonlabs-io/vigilante/retrywrap" "strconv" pv "github.com/cosmos/relayer/v2/relayer/provider" - "github.com/babylonlabs-io/babylon/types/retry" + "github.com/avast/retry-go/v4" btcctypes "github.com/babylonlabs-io/babylon/x/btccheckpoint/types" btclctypes "github.com/babylonlabs-io/babylon/x/btclightclient/types" "github.com/babylonlabs-io/vigilante/types" @@ -33,10 +34,13 @@ func (r *Reporter) getHeaderMsgsToSubmit(signer string, ibs []*types.IndexedBloc for i, header := range ibs { blockHash := header.BlockHash() var res *btclctypes.QueryContainsBytesResponse - err = retry.Do(r.retrySleepTime, r.maxRetrySleepTime, func() error { + err = retrywrap.Do(func() error { res, err = r.babylonClient.ContainsBTCBlock(&blockHash) return err - }) + }, + retry.Delay(r.retrySleepTime), + retry.MaxDelay(r.maxRetrySleepTime), + ) if err != nil { return nil, err } @@ -57,7 +61,7 @@ func (r *Reporter) getHeaderMsgsToSubmit(signer string, ibs []*types.IndexedBloc blockChunks := chunkBy(ibsToSubmit, int(r.Cfg.MaxHeadersInMsg)) - headerMsgsToSubmit := []*btclctypes.MsgInsertHeaders{} + headerMsgsToSubmit := make([]*btclctypes.MsgInsertHeaders, 0, len(blockChunks)) for _, ibChunk := range blockChunks { msgInsertHeaders := types.NewMsgInsertHeaders(signer, ibChunk) @@ -69,14 +73,17 @@ func (r *Reporter) getHeaderMsgsToSubmit(signer string, ibs []*types.IndexedBloc func (r *Reporter) submitHeaderMsgs(msg *btclctypes.MsgInsertHeaders) error { // submit the headers - err := retry.Do(r.retrySleepTime, r.maxRetrySleepTime, func() error { + err := retrywrap.Do(func() error { res, err := r.babylonClient.InsertHeaders(context.Background(), msg) if err != nil { return err } r.logger.Infof("Successfully submitted %d headers to Babylon with response code %v", len(msg.Headers), res.Code) return nil - }) + }, + retry.Delay(r.retrySleepTime), + retry.MaxDelay(r.maxRetrySleepTime), + ) if err != nil { r.metrics.FailedHeadersCounter.Add(float64(len(msg.Headers))) return fmt.Errorf("failed to submit headers: %w", err) diff --git a/retrywrap/retry.go b/retrywrap/retry.go new file mode 100644 index 00000000..6ace51b0 --- /dev/null +++ b/retrywrap/retry.go @@ -0,0 +1,62 @@ +package retrywrap + +import ( + "errors" + "github.com/avast/retry-go/v4" + btcctypes "github.com/babylonlabs-io/babylon/x/btccheckpoint/types" + btclctypes "github.com/babylonlabs-io/babylon/x/btclightclient/types" + checkpointingtypes "github.com/babylonlabs-io/babylon/x/checkpointing/types" +) + +// unrecoverableErrors is a list of errors which are unsafe and should not be retried. +var unrecoverableErrors = []error{ + btclctypes.ErrHeaderParentDoesNotExist, + btclctypes.ErrChainWithNotEnoughWork, + btclctypes.ErrInvalidHeader, + btcctypes.ErrProvidedHeaderDoesNotHaveAncestor, + btcctypes.ErrInvalidHeader, + btcctypes.ErrNoCheckpointsForPreviousEpoch, + btcctypes.ErrInvalidCheckpointProof, + checkpointingtypes.ErrBlsPrivKeyDoesNotExist, +} + +// expectedErrors is a list of errors which can safely be ignored and should not be retried. +var expectedErrors = []error{ + btcctypes.ErrDuplicatedSubmission, + btcctypes.ErrInvalidHeader, +} + +func containsErr(errs []error, err error) bool { + for _, e := range errs { + if errors.Is(err, e) { + return true + } + } + + return false +} + +// Do - executes a retryable function with customizable retry behavior using retry-go. +// It retries the function unless the error is considered unrecoverable or expected. +// Unrecoverable errors will stop further retries and return the error. +// Expected errors are ignored and considered as successful executions. +func Do(retryableFunc retry.RetryableFunc, opts ...retry.Option) error { + opt := retry.RetryIf(func(err error) bool { + // Don't retry on unrecoverable errors + if containsErr(unrecoverableErrors, err) || containsErr(expectedErrors, err) { + return false + } + + return true // Retry on all other errors + }) + + opts = append(opts, opt) + + err := retry.Do(retryableFunc, opts...) + if containsErr(expectedErrors, err) { + // Return nil for expected errors to ignore them + return nil + } + + return err +} diff --git a/retrywrap/retry_test.go b/retrywrap/retry_test.go new file mode 100644 index 00000000..b0e9be01 --- /dev/null +++ b/retrywrap/retry_test.go @@ -0,0 +1,63 @@ +package retrywrap + +import ( + "errors" + "github.com/avast/retry-go/v4" + btcctypes "github.com/babylonlabs-io/babylon/x/btccheckpoint/types" + btclctypes "github.com/babylonlabs-io/babylon/x/btclightclient/types" + "testing" +) + +func TestWrapDo(t *testing.T) { + tests := []struct { + name string + retryableErr error + expectedErr error + attempts int + }{ + { + name: "Retry on regular error", + retryableErr: errors.New("regular error"), + expectedErr: errors.New("regular error"), + attempts: 3, + }, + + { + name: "No retry on unrecoverable error", + retryableErr: btclctypes.ErrInvalidHeader, + expectedErr: btclctypes.ErrInvalidHeader, + attempts: 1, + }, + { + name: "Ignore expected error", + retryableErr: btcctypes.ErrDuplicatedSubmission, + expectedErr: nil, + attempts: 1, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + var attempts int + retryableFunc := func() error { + attempts++ + return tt.retryableErr + } + + err := Do( + retryableFunc, + retry.Attempts(uint(tt.attempts)), + retry.LastErrorOnly(true), + ) + + if err != nil && err.Error() != tt.expectedErr.Error() { + t.Errorf("WrapDo() error = %v, want %v", err, tt.expectedErr) + } + + if attempts != tt.attempts { + t.Errorf("WrapDo() attempts = %v, want %v", attempts, tt.attempts) + } + }) + } +} diff --git a/submitter/submitter.go b/submitter/submitter.go index da1fd744..82f467fb 100644 --- a/submitter/submitter.go +++ b/submitter/submitter.go @@ -3,11 +3,12 @@ package submitter import ( "encoding/hex" "fmt" + "github.com/babylonlabs-io/vigilante/retrywrap" "sync" "time" + "github.com/avast/retry-go/v4" "github.com/babylonlabs-io/babylon/btctxformatter" - "github.com/babylonlabs-io/babylon/types/retry" btcctypes "github.com/babylonlabs-io/babylon/x/btccheckpoint/types" sdk "github.com/cosmos/cosmos-sdk/types" "go.uber.org/zap" @@ -40,7 +41,7 @@ func New( btcWallet btcclient.BTCWallet, queryClient BabylonQueryClient, submitterAddr sdk.AccAddress, - retrySleepTime, maxRetrySleepTime time.Duration, + retrySleepTime, maxRetrySleepTime time.Duration, maxRetryTimes uint, submitterMetrics *metrics.SubmitterMetrics, ) (*Submitter, error) { logger := parentLogger.With(zap.String("module", "submitter")) @@ -48,10 +49,14 @@ func New( btccheckpointParams *btcctypes.QueryParamsResponse err error ) - err = retry.Do(retrySleepTime, maxRetrySleepTime, func() error { + err = retrywrap.Do(func() error { btccheckpointParams, err = queryClient.BTCCheckpointParams() return err - }) + }, + retry.Delay(retrySleepTime), + retry.MaxDelay(maxRetrySleepTime), + retry.Attempts(maxRetryTimes), + ) if err != nil { return nil, fmt.Errorf("failed to get checkpoint params: %w", err) }