From aaef57e07a42bccd45561c53a047e9ebafef7036 Mon Sep 17 00:00:00 2001 From: Cirrus Gai Date: Wed, 20 Nov 2024 16:43:11 +0800 Subject: [PATCH] chore: Replace fast sync with batch processing (#132) Closes #118. Notable changes: * remove fast sync completely * introduce batch process for catching up * introduce signature submission interval to config * introduce batch submission size to config --- clientcontroller/babylon/babylon.go | 121 ++++++- finality-provider/config/config.go | 114 +++--- finality-provider/config/poller.go | 4 +- finality-provider/service/fastsync.go | 99 ----- finality-provider/service/fastsync_test.go | 123 ------- finality-provider/service/fp_instance.go | 339 +++++------------- finality-provider/service/fp_instance_test.go | 7 +- itest/babylon/babylon_e2e_test.go | 5 +- 8 files changed, 266 insertions(+), 546 deletions(-) delete mode 100644 finality-provider/service/fastsync.go delete mode 100644 finality-provider/service/fastsync_test.go diff --git a/clientcontroller/babylon/babylon.go b/clientcontroller/babylon/babylon.go index 6f9beabd..88acbf22 100644 --- a/clientcontroller/babylon/babylon.go +++ b/clientcontroller/babylon/babylon.go @@ -18,6 +18,7 @@ import ( "github.com/btcsuite/btcd/btcec/v2/schnorr" "github.com/btcsuite/btcd/btcutil" "github.com/btcsuite/btcd/chaincfg" + cmtcrypto "github.com/cometbft/cometbft/proto/tendermint/crypto" sdk "github.com/cosmos/cosmos-sdk/types" sdkquery "github.com/cosmos/cosmos-sdk/types/query" sttypes "github.com/cosmos/cosmos-sdk/x/staking/types" @@ -145,7 +146,125 @@ func (bc *BabylonController) RegisterFinalityProvider( return &types.TxResponse{TxHash: res.TxHash}, nil } -func (bc *BabylonController) QueryFinalityProviderSlashedOrJailed(fpPk *btcec.PublicKey) (slashed bool, jailed bool, err error) { +// CommitPubRandList commits a list of Schnorr public randomness via a MsgCommitPubRand to Babylon +// it returns tx hash and error +func (bc *BabylonController) CommitPubRandList( + fpPk *btcec.PublicKey, + startHeight uint64, + numPubRand uint64, + commitment []byte, + sig *schnorr.Signature, +) (*types.TxResponse, error) { + msg := &finalitytypes.MsgCommitPubRandList{ + Signer: bc.MustGetTxSigner(), + FpBtcPk: bbntypes.NewBIP340PubKeyFromBTCPK(fpPk), + StartHeight: startHeight, + NumPubRand: numPubRand, + Commitment: commitment, + Sig: bbntypes.NewBIP340SignatureFromBTCSig(sig), + } + + unrecoverableErrs := []*sdkErr.Error{ + finalitytypes.ErrInvalidPubRand, + finalitytypes.ErrTooFewPubRand, + finalitytypes.ErrNoPubRandYet, + btcstakingtypes.ErrFpNotFound, + } + + res, err := bc.reliablySendMsg(msg, emptyErrs, unrecoverableErrs) + if err != nil { + return nil, err + } + + return &types.TxResponse{TxHash: res.TxHash}, nil +} + +// SubmitFinalitySig submits the finality signature via a MsgAddVote to Babylon +func (bc *BabylonController) SubmitFinalitySig( + fpPk *btcec.PublicKey, + block *types.BlockInfo, + pubRand *btcec.FieldVal, + proof []byte, // TODO: have a type for proof + sig *btcec.ModNScalar, +) (*types.TxResponse, error) { + return bc.SubmitBatchFinalitySigs( + fpPk, []*types.BlockInfo{block}, []*btcec.FieldVal{pubRand}, + [][]byte{proof}, []*btcec.ModNScalar{sig}, + ) +} + +// SubmitBatchFinalitySigs submits a batch of finality signatures to Babylon +func (bc *BabylonController) SubmitBatchFinalitySigs( + fpPk *btcec.PublicKey, + blocks []*types.BlockInfo, + pubRandList []*btcec.FieldVal, + proofList [][]byte, + sigs []*btcec.ModNScalar, +) (*types.TxResponse, error) { + if len(blocks) != len(sigs) { + return nil, fmt.Errorf("the number of blocks %v should match the number of finality signatures %v", len(blocks), len(sigs)) + } + + msgs := make([]sdk.Msg, 0, len(blocks)) + for i, b := range blocks { + cmtProof := cmtcrypto.Proof{} + if err := cmtProof.Unmarshal(proofList[i]); err != nil { + return nil, err + } + + msg := &finalitytypes.MsgAddFinalitySig{ + Signer: bc.MustGetTxSigner(), + FpBtcPk: bbntypes.NewBIP340PubKeyFromBTCPK(fpPk), + BlockHeight: b.Height, + PubRand: bbntypes.NewSchnorrPubRandFromFieldVal(pubRandList[i]), + Proof: &cmtProof, + BlockAppHash: b.Hash, + FinalitySig: bbntypes.NewSchnorrEOTSSigFromModNScalar(sigs[i]), + } + msgs = append(msgs, msg) + } + + unrecoverableErrs := []*sdkErr.Error{ + finalitytypes.ErrInvalidFinalitySig, + finalitytypes.ErrPubRandNotFound, + btcstakingtypes.ErrFpAlreadySlashed, + } + + res, err := bc.reliablySendMsgs(msgs, emptyErrs, unrecoverableErrs) + if err != nil { + return nil, err + } + + if res == nil { + return &types.TxResponse{}, nil + } + + return &types.TxResponse{TxHash: res.TxHash}, nil +} + +// UnjailFinalityProvider sends an unjail transaction to the consumer chain +func (bc *BabylonController) UnjailFinalityProvider(fpPk *btcec.PublicKey) (*types.TxResponse, error) { + msg := &finalitytypes.MsgUnjailFinalityProvider{ + Signer: bc.MustGetTxSigner(), + FpBtcPk: bbntypes.NewBIP340PubKeyFromBTCPK(fpPk), + } + + unrecoverableErrs := []*sdkErr.Error{ + btcstakingtypes.ErrFpNotFound, + btcstakingtypes.ErrFpNotJailed, + btcstakingtypes.ErrFpAlreadySlashed, + } + + res, err := bc.reliablySendMsg(msg, emptyErrs, unrecoverableErrs) + if err != nil { + return nil, err + } + + return &types.TxResponse{TxHash: res.TxHash}, nil +} + +// QueryFinalityProviderSlashedOrJailed - returns if the fp has been slashed, jailed, err +func (bc *BabylonController) QueryFinalityProviderSlashedOrJailed(fpPk *btcec.PublicKey) (bool, bool, error) { fpPubKey := bbntypes.NewBIP340PubKeyFromBTCPK(fpPk) res, err := bc.bbnClient.QueryClient.FinalityProvider(fpPubKey.MarshalHex()) if err != nil { diff --git a/finality-provider/config/config.go b/finality-provider/config/config.go index 62683466..6cdd267e 100644 --- a/finality-provider/config/config.go +++ b/finality-provider/config/config.go @@ -18,27 +18,26 @@ import ( ) const ( - defaultChainType = "babylon" - defaultLogLevel = zapcore.InfoLevel - defaultLogDirname = "logs" - defaultLogFilename = "fpd.log" - defaultFinalityProviderKeyName = "finality-provider" - DefaultRPCPort = 12581 - defaultConfigFileName = "fpd.conf" - defaultNumPubRand = 70000 // support running of 1 week with block production time as 10s - defaultNumPubRandMax = 100000 - defaultMinRandHeightGap = 35000 - defaultStatusUpdateInterval = 20 * time.Second - defaultRandomInterval = 30 * time.Second - defaultSubmitRetryInterval = 1 * time.Second - defaultFastSyncInterval = 10 * time.Second - defaultSyncFpStatusInterval = 30 * time.Second - defaultFastSyncLimit = 10 - defaultFastSyncGap = 3 - defaultMaxSubmissionRetries = 20 - defaultBitcoinNetwork = "signet" - defaultDataDirname = "data" - defaultMaxNumFinalityProviders = 3 + defaultChainType = "babylon" + defaultLogLevel = zapcore.InfoLevel + defaultLogDirname = "logs" + defaultLogFilename = "fpd.log" + defaultFinalityProviderKeyName = "finality-provider" + DefaultRPCPort = 12581 + defaultConfigFileName = "fpd.conf" + defaultNumPubRand = 70000 // support running of 1 week with block production time as 10s + defaultNumPubRandMax = 100000 + defaultMinRandHeightGap = 35000 + defaultBatchSubmissionSize = 1000 + defaultStatusUpdateInterval = 20 * time.Second + defaultRandomInterval = 30 * time.Second + defaultSubmitRetryInterval = 1 * time.Second + defaultSyncFpStatusInterval = 30 * time.Second + defaultSignatureSubmissionInterval = 1 * time.Second + defaultMaxSubmissionRetries = 20 + defaultBitcoinNetwork = "signet" + defaultDataDirname = "data" + defaultMaxNumFinalityProviders = 3 ) var ( @@ -55,21 +54,21 @@ var ( // Config is the main config for the fpd cli command type Config struct { - LogLevel string `long:"loglevel" description:"Logging level for all subsystems" choice:"trace" choice:"debug" choice:"info" choice:"warn" choice:"error" choice:"fatal"` - ChainType string `long:"chaintype" description:"the type of the consumer chain (babylon/OPStackL2/wasm)"` - NumPubRand uint64 `long:"numPubRand" description:"The number of Schnorr public randomness for each commitment"` - NumPubRandMax uint64 `long:"numpubrandmax" description:"The upper bound of the number of Schnorr public randomness for each commitment"` - MinRandHeightGap uint64 `long:"minrandheightgap" description:"The minimum gap between the last committed rand height and the current Babylon block height"` - StatusUpdateInterval time.Duration `long:"statusupdateinterval" description:"The interval between each update of finality-provider status"` - RandomnessCommitInterval time.Duration `long:"randomnesscommitinterval" description:"The interval between each attempt to commit public randomness"` - SubmissionRetryInterval time.Duration `long:"submissionretryinterval" description:"The interval between each attempt to submit finality signature or public randomness after a failure"` - MaxSubmissionRetries uint64 `long:"maxsubmissionretries" description:"The maximum number of retries to submit finality signature or public randomness"` - FastSyncInterval time.Duration `long:"fastsyncinterval" description:"The interval between each try of fast sync, which is disabled if the value is 0"` - FastSyncLimit uint64 `long:"fastsynclimit" description:"The maximum number of blocks to catch up for each fast sync"` - FastSyncGap uint64 `long:"fastsyncgap" description:"The block gap that will trigger the fast sync"` - EOTSManagerAddress string `long:"eotsmanageraddress" description:"The address of the remote EOTS manager; Empty if the EOTS manager is running locally"` - MaxNumFinalityProviders uint32 `long:"maxnumfinalityproviders" description:"The maximum number of finality-provider instances running concurrently within the daemon"` - SyncFpStatusInterval time.Duration `long:"syncfpstatusinterval" description:"The duration of time that it should sync FP status with the client blockchain"` + LogLevel string `long:"loglevel" description:"Logging level for all subsystems" choice:"trace" choice:"debug" choice:"info" choice:"warn" choice:"error" choice:"fatal"` + // ChainType and ChainID (if any) of the chain config identify a consumer chain + ChainType string `long:"chaintype" description:"the type of the consumer chain" choice:"babylon"` + NumPubRand uint32 `long:"numPubRand" description:"The number of Schnorr public randomness for each commitment"` + NumPubRandMax uint32 `long:"numpubrandmax" description:"The upper bound of the number of Schnorr public randomness for each commitment"` + MinRandHeightGap uint32 `long:"minrandheightgap" description:"The minimum gap between the last committed rand height and the current Babylon block height"` + MaxSubmissionRetries uint32 `long:"maxsubmissionretries" description:"The maximum number of retries to submit finality signature or public randomness"` + EOTSManagerAddress string `long:"eotsmanageraddress" description:"The address of the remote EOTS manager; Empty if the EOTS manager is running locally"` + BatchSubmissionSize uint32 `long:"batchsubmissionsize" description:"The size of a batch in one submission"` + MaxNumFinalityProviders uint32 `long:"maxnumfinalityproviders" description:"The maximum number of finality-provider instances running concurrently within the daemon"` + StatusUpdateInterval time.Duration `long:"statusupdateinterval" description:"The interval between each update of finality-provider status"` + RandomnessCommitInterval time.Duration `long:"randomnesscommitinterval" description:"The interval between each attempt to commit public randomness"` + SubmissionRetryInterval time.Duration `long:"submissionretryinterval" description:"The interval between each attempt to submit finality signature or public randomness after a failure"` + SyncFpStatusInterval time.Duration `long:"syncfpstatusinterval" description:"The duration of time that it should sync FP status with the client blockchain"` + SignatureSubmissionInterval time.Duration `long:"signaturesubmissioninterval" description:"The interval between each finality signature(s) submission"` BitcoinNetwork string `long:"bitcoinnetwork" description:"Bitcoin network to run on" choise:"mainnet" choice:"regtest" choice:"testnet" choice:"simnet" choice:"signet"` @@ -96,28 +95,27 @@ func DefaultConfigWithHome(homePath string) Config { bbnCfg.KeyDirectory = homePath pollerCfg := DefaultChainPollerConfig() cfg := Config{ - ChainType: defaultChainType, - LogLevel: defaultLogLevel.String(), - DatabaseConfig: DefaultDBConfigWithHomePath(homePath), - BabylonConfig: &bbnCfg, - PollerConfig: &pollerCfg, - NumPubRand: defaultNumPubRand, - NumPubRandMax: defaultNumPubRandMax, - MinRandHeightGap: defaultMinRandHeightGap, - StatusUpdateInterval: defaultStatusUpdateInterval, - RandomnessCommitInterval: defaultRandomInterval, - SubmissionRetryInterval: defaultSubmitRetryInterval, - FastSyncInterval: defaultFastSyncInterval, - FastSyncLimit: defaultFastSyncLimit, - FastSyncGap: defaultFastSyncGap, - MaxSubmissionRetries: defaultMaxSubmissionRetries, - BitcoinNetwork: defaultBitcoinNetwork, - BTCNetParams: defaultBTCNetParams, - EOTSManagerAddress: defaultEOTSManagerAddress, - RpcListener: DefaultRpcListener, - MaxNumFinalityProviders: defaultMaxNumFinalityProviders, - Metrics: metrics.DefaultFpConfig(), - SyncFpStatusInterval: defaultSyncFpStatusInterval, + ChainType: defaultChainType, + LogLevel: defaultLogLevel.String(), + DatabaseConfig: DefaultDBConfigWithHomePath(homePath), + BabylonConfig: &bbnCfg, + PollerConfig: &pollerCfg, + NumPubRand: defaultNumPubRand, + NumPubRandMax: defaultNumPubRandMax, + MinRandHeightGap: defaultMinRandHeightGap, + BatchSubmissionSize: defaultBatchSubmissionSize, + StatusUpdateInterval: defaultStatusUpdateInterval, + RandomnessCommitInterval: defaultRandomInterval, + SubmissionRetryInterval: defaultSubmitRetryInterval, + SignatureSubmissionInterval: defaultSignatureSubmissionInterval, + MaxSubmissionRetries: defaultMaxSubmissionRetries, + BitcoinNetwork: defaultBitcoinNetwork, + BTCNetParams: defaultBTCNetParams, + EOTSManagerAddress: defaultEOTSManagerAddress, + RpcListener: DefaultRpcListener, + Metrics: metrics.DefaultFpConfig(), + SyncFpStatusInterval: defaultSyncFpStatusInterval, + MaxNumFinalityProviders: defaultMaxNumFinalityProviders, } if err := cfg.Validate(); err != nil { diff --git a/finality-provider/config/poller.go b/finality-provider/config/poller.go index 20f28641..d0e1e0f1 100644 --- a/finality-provider/config/poller.go +++ b/finality-provider/config/poller.go @@ -4,13 +4,13 @@ import "time" var ( defaultBufferSize = uint32(1000) - defaultPollingInterval = 20 * time.Second + defaultPollingInterval = 1 * time.Second defaultStaticStartHeight = uint64(1) ) type ChainPollerConfig struct { BufferSize uint32 `long:"buffersize" description:"The maximum number of Babylon blocks that can be stored in the buffer"` - PollInterval time.Duration `long:"pollinterval" description:"The interval between each polling of consumer chain blocks"` + PollInterval time.Duration `long:"pollinterval" description:"The interval between each polling of blocks; the value should be set depending on the block production time but could be set smaller for quick catching up"` StaticChainScanningStartHeight uint64 `long:"staticchainscanningstartheight" description:"The static height from which we start polling the chain"` AutoChainScanningMode bool `long:"autochainscanningmode" description:"Automatically discover the height from which to start polling the chain"` } diff --git a/finality-provider/service/fastsync.go b/finality-provider/service/fastsync.go deleted file mode 100644 index dc0807e2..00000000 --- a/finality-provider/service/fastsync.go +++ /dev/null @@ -1,99 +0,0 @@ -package service - -import ( - "fmt" - - "go.uber.org/zap" - - "github.com/babylonlabs-io/finality-provider/types" -) - -type FastSyncResult struct { - Responses []*types.TxResponse - SyncedHeight uint64 - LastProcessedHeight uint64 -} - -// FastSync attempts to send a batch of finality signatures -// from the maximum of the last voted height and the last finalized height -// to the current height -func (fp *FinalityProviderInstance) FastSync(startHeight, endHeight uint64) (*FastSyncResult, error) { - if fp.inSync.Swap(true) { - return nil, fmt.Errorf("the finality-provider has already been in fast sync") - } - defer fp.inSync.Store(false) - - if startHeight > endHeight { - return nil, fmt.Errorf("the start height %v should not be higher than the end height %v", - startHeight, endHeight) - } - - var syncedHeight uint64 - responses := make([]*types.TxResponse, 0) - // we may need several rounds to catch-up as we need to limit - // the catch-up distance for each round to avoid memory overflow - for startHeight <= endHeight { - blocks, err := fp.consumerCon.QueryBlocks(startHeight, endHeight, fp.cfg.FastSyncLimit) - if err != nil { - return nil, err - } - - if len(blocks) < 1 { - break - } - - startHeight = blocks[len(blocks)-1].Height + 1 - - // Note: not all the blocks in the range will have votes cast - // due to lack of voting power or public randomness, so we may - // have gaps during sync - catchUpBlocks := make([]*types.BlockInfo, 0, len(blocks)) - for _, b := range blocks { - // check whether the block has been processed before - if fp.hasProcessed(b.Height) { - continue - } - // check whether the finality provider has voting power - hasVp, err := fp.hasVotingPower(b.Height) - if err != nil { - return nil, err - } - if !hasVp { - fp.metrics.IncrementFpTotalBlocksWithoutVotingPower(fp.GetBtcPkHex()) - continue - } - // all good, add the block for catching up - catchUpBlocks = append(catchUpBlocks, b) - } - - if len(catchUpBlocks) < 1 { - continue - } - - syncedHeight = catchUpBlocks[len(catchUpBlocks)-1].Height - - res, err := fp.SubmitBatchFinalitySignatures(catchUpBlocks) - if err != nil { - return nil, err - } - fp.metrics.AddToFpTotalVotedBlocks(fp.GetBtcPkHex(), float64(len(catchUpBlocks))) - - responses = append(responses, res) - - fp.logger.Debug( - "the finality-provider is catching up by sending finality signatures in a batch", - zap.String("pk", fp.GetBtcPkHex()), - zap.Uint64("start_height", catchUpBlocks[0].Height), - zap.Uint64("synced_height", syncedHeight), - ) - } - - // update the processed height - fp.MustSetLastProcessedHeight(syncedHeight) - - return &FastSyncResult{ - Responses: responses, - SyncedHeight: syncedHeight, - LastProcessedHeight: fp.GetLastProcessedHeight(), - }, nil -} diff --git a/finality-provider/service/fastsync_test.go b/finality-provider/service/fastsync_test.go deleted file mode 100644 index a2001df5..00000000 --- a/finality-provider/service/fastsync_test.go +++ /dev/null @@ -1,123 +0,0 @@ -package service_test - -import ( - "math/rand" - "testing" - - "github.com/babylonlabs-io/babylon/testutil/datagen" - "github.com/golang/mock/gomock" - "github.com/stretchr/testify/require" - - "github.com/babylonlabs-io/finality-provider/testutil" - "github.com/babylonlabs-io/finality-provider/types" -) - -// FuzzFastSync_SufficientRandomness tests a case where we have sufficient -// randomness and voting power when the finality provider enters fast-sync -// it is expected that the finality provider could catch up to the current -// height through fast-sync -func FuzzFastSync_SufficientRandomness(f *testing.F) { - testutil.AddRandomSeedsToFuzzer(f, 10) - f.Fuzz(func(t *testing.T, seed int64) { - r := rand.New(rand.NewSource(seed)) - - randomStartingHeight := uint64(r.Int63n(100) + 1) - finalizedHeight := randomStartingHeight + uint64(r.Int63n(10)+2) - currentHeight := finalizedHeight + uint64(r.Int63n(10)+1) - mockConsumerController := testutil.PrepareMockedConsumerController(t, r, randomStartingHeight, currentHeight) - mockBabylonController := testutil.PrepareMockedBabylonController(t) - mockConsumerController.EXPECT().QueryLatestFinalizedBlock().Return(nil, nil).AnyTimes() - _, fpIns, cleanUp := startFinalityProviderAppWithRegisteredFp(t, r, mockBabylonController, mockConsumerController, randomStartingHeight) - defer cleanUp() - - // commit pub rand - mockConsumerController.EXPECT().QueryLastPublicRandCommit(gomock.Any()).Return(nil, nil).Times(1) - _, err := fpIns.CommitPubRand(randomStartingHeight) - require.NoError(t, err) - - mockConsumerController.EXPECT().QueryFinalityProviderHasPower(fpIns.GetBtcPk(), gomock.Any()). - Return(true, nil).AnyTimes() - // the last committed height is higher than the current height - // to make sure the randomness is sufficient - lastCommittedHeight := randomStartingHeight + testutil.TestPubRandNum - lastCommittedPubRand := &types.PubRandCommit{ - StartHeight: lastCommittedHeight, - NumPubRand: 1000, - Commitment: datagen.GenRandomByteArray(r, 32), - } - mockConsumerController.EXPECT().QueryLastPublicRandCommit(gomock.Any()).Return(lastCommittedPubRand, nil).AnyTimes() - - catchUpBlocks := testutil.GenBlocks(r, finalizedHeight+1, currentHeight) - expectedTxHash := testutil.GenRandomHexStr(r, 32) - finalizedBlock := &types.BlockInfo{Height: finalizedHeight, Hash: testutil.GenRandomByteArray(r, 32)} - mockConsumerController.EXPECT().QueryLatestFinalizedBlock().Return(finalizedBlock, nil).AnyTimes() - mockConsumerController.EXPECT().QueryBlocks(finalizedHeight+1, currentHeight, uint64(10)). - Return(catchUpBlocks, nil) - mockConsumerController.EXPECT().SubmitBatchFinalitySigs(fpIns.GetBtcPk(), catchUpBlocks, gomock.Any(), gomock.Any(), gomock.Any()). - Return(&types.TxResponse{TxHash: expectedTxHash}, nil).AnyTimes() - result, err := fpIns.FastSync(finalizedHeight+1, currentHeight) - require.NoError(t, err) - require.NotNil(t, result) - require.Equal(t, expectedTxHash, result.Responses[0].TxHash) - require.Equal(t, currentHeight, fpIns.GetLastVotedHeight()) - require.Equal(t, currentHeight, fpIns.GetLastProcessedHeight()) - }) -} - -// FuzzFastSync_NoRandomness tests a case where we have insufficient -// randomness but with voting power when the finality provider enters fast-sync -// it is expected that the finality provider could catch up to the last -// committed height -func FuzzFastSync_NoRandomness(f *testing.F) { - testutil.AddRandomSeedsToFuzzer(f, 10) - f.Fuzz(func(t *testing.T, seed int64) { - r := rand.New(rand.NewSource(seed)) - - randomStartingHeight := uint64(r.Int63n(100) + 100) - finalizedHeight := randomStartingHeight + uint64(r.Int63n(10)+2) - currentHeight := finalizedHeight + uint64(r.Int63n(10)+1) - mockConsumerController := testutil.PrepareMockedConsumerController(t, r, randomStartingHeight, currentHeight) - mockBabylonController := testutil.PrepareMockedBabylonController(t) - mockConsumerController.EXPECT().QueryLatestFinalizedBlock().Return(nil, nil).AnyTimes() - _, fpIns, cleanUp := startFinalityProviderAppWithRegisteredFp(t, r, mockBabylonController, mockConsumerController, randomStartingHeight) - defer cleanUp() - - // commit pub rand - mockConsumerController.EXPECT().QueryLastPublicRandCommit(gomock.Any()).Return(nil, nil).Times(1) - _, err := fpIns.CommitPubRand(randomStartingHeight) - require.NoError(t, err) - - // the last height with pub rand is a random value inside [finalizedHeight+1, currentHeight] - lastHeightWithPubRand := uint64(rand.Intn(int(currentHeight)-int(finalizedHeight))) + finalizedHeight + 1 - for i := randomStartingHeight; i <= currentHeight; i++ { - if i <= lastHeightWithPubRand { - mockConsumerController.EXPECT().QueryFinalityProviderHasPower(fpIns.GetBtcPk(), i). - Return(true, nil).AnyTimes() - } else { - mockConsumerController.EXPECT().QueryFinalityProviderHasPower(fpIns.GetBtcPk(), i). - Return(false, nil).AnyTimes() - } - } - lastCommittedPubRand := &types.PubRandCommit{ - StartHeight: lastHeightWithPubRand - 10, - NumPubRand: 10 + 1, - Commitment: datagen.GenRandomByteArray(r, 32), - } - mockConsumerController.EXPECT().QueryLastPublicRandCommit(gomock.Any()).Return(lastCommittedPubRand, nil).AnyTimes() - - catchUpBlocks := testutil.GenBlocks(r, finalizedHeight+1, currentHeight) - expectedTxHash := testutil.GenRandomHexStr(r, 32) - finalizedBlock := &types.BlockInfo{Height: finalizedHeight, Hash: testutil.GenRandomByteArray(r, 32)} - mockConsumerController.EXPECT().QueryLatestFinalizedBlock().Return(finalizedBlock, nil).AnyTimes() - mockConsumerController.EXPECT().QueryBlocks(finalizedHeight+1, currentHeight, uint64(10)). - Return(catchUpBlocks, nil) - mockConsumerController.EXPECT().SubmitBatchFinalitySigs(fpIns.GetBtcPk(), catchUpBlocks[:lastHeightWithPubRand-finalizedHeight], gomock.Any(), gomock.Any(), gomock.Any()). - Return(&types.TxResponse{TxHash: expectedTxHash}, nil).AnyTimes() - result, err := fpIns.FastSync(finalizedHeight+1, currentHeight) - require.NoError(t, err) - require.NotNil(t, result) - require.Equal(t, expectedTxHash, result.Responses[0].TxHash) - require.Equal(t, lastHeightWithPubRand, fpIns.GetLastVotedHeight()) - require.Equal(t, lastHeightWithPubRand, fpIns.GetLastProcessedHeight()) - }) -} diff --git a/finality-provider/service/fp_instance.go b/finality-provider/service/fp_instance.go index cdec8bc9..8d07f3d0 100644 --- a/finality-provider/service/fp_instance.go +++ b/finality-provider/service/fp_instance.go @@ -10,7 +10,6 @@ import ( "github.com/avast/retry-go/v4" bbntypes "github.com/babylonlabs-io/babylon/types" - bstypes "github.com/babylonlabs-io/babylon/x/btcstaking/types" ftypes "github.com/babylonlabs-io/babylon/x/finality/types" "github.com/btcsuite/btcd/btcec/v2" "github.com/cosmos/relayer/v2/relayer/provider" @@ -45,8 +44,7 @@ type FinalityProviderInstance struct { // passphrase is used to unlock private keys passphrase string - laggingTargetChan chan uint64 - criticalErrChan chan<- *CriticalError + criticalErrChan chan<- *CriticalError isStarted *atomic.Bool inSync *atomic.Bool @@ -146,57 +144,30 @@ func (fp *FinalityProviderInstance) Start() error { fp.logger.Info("Starting finality-provider instance", zap.String("pk", fp.GetBtcPkHex())) - startHeight, err := fp.bootstrap() + startHeight, err := fp.getPollerStartingHeight() if err != nil { - return fmt.Errorf("failed to bootstrap the finality-provider %s: %w", fp.GetBtcPkHex(), err) + return fmt.Errorf("failed to get the start height: %w", err) } - fp.logger.Info("the finality-provider has been bootstrapped", + fp.logger.Info("starting the finality provider", zap.String("pk", fp.GetBtcPkHex()), zap.Uint64("height", startHeight)) poller := NewChainPoller(fp.logger, fp.cfg.PollerConfig, fp.cc, fp.consumerCon, fp.metrics) if err := poller.Start(startHeight); err != nil { - return fmt.Errorf("failed to start the poller: %w", err) + return fmt.Errorf("failed to start the poller with start height %d: %w", startHeight, err) } fp.poller = poller - - fp.laggingTargetChan = make(chan uint64, 1) - fp.quit = make(chan struct{}) - fp.wg.Add(1) go fp.finalitySigSubmissionLoop() fp.wg.Add(1) go fp.randomnessCommitmentLoop(startHeight) - fp.wg.Add(1) - go fp.checkLaggingLoop() return nil } -func (fp *FinalityProviderInstance) bootstrap() (uint64, error) { - latestBlockHeight, err := fp.getLatestBlockHeightWithRetry() - if err != nil { - return 0, err - } - - if fp.cfg.FastSyncInterval != 0 && fp.checkLagging(latestBlockHeight) { - _, err := fp.tryFastSync(latestBlockHeight) - if err != nil && !fpcc.IsExpected(err) { - return 0, err - } - } - - startHeight, err := fp.getPollerStartingHeight() - if err != nil { - return 0, err - } - - return startHeight, nil -} - func (fp *FinalityProviderInstance) Stop() error { if !fp.isStarted.Swap(false) { return fmt.Errorf("the finality-provider %s has already stopped", fp.GetBtcPkHex()) @@ -225,44 +196,7 @@ func (fp *FinalityProviderInstance) finalitySigSubmissionLoop() { for { select { - case targetBlock := <-fp.laggingTargetChan: - res, err := fp.tryFastSync(targetBlock) - fp.isLagging.Store(false) - if err != nil { - if errors.Is(err, bstypes.ErrFpAlreadySlashed) { - fp.reportCriticalErr(err) - continue - } - fp.logger.Debug( - "failed to sync up, will try again later", - zap.String("pk", fp.GetBtcPkHex()), - zap.Error(err), - ) - continue - } - // response might be nil if sync is not needed - if res != nil { - fp.logger.Info( - "fast sync is finished", - zap.String("pk", fp.GetBtcPkHex()), - zap.Uint64("synced_height", res.SyncedHeight), - zap.Uint64("last_processed_height", res.LastProcessedHeight), - ) - - // inform the poller to skip to the next block of the last - // processed one - err := fp.poller.SkipToHeight(fp.GetLastProcessedHeight() + 1) - if err != nil { - fp.logger.Debug( - "failed to skip heights from the poller", - zap.Error(err), - ) - } - } - case <-fp.quit: - fp.logger.Info("the finality signature submission loop is closing") - return - default: + case <-time.After(fp.cfg.SignatureSubmissionInterval): pollerBlocks := fp.getAllBlocksFromChan() if len(pollerBlocks) == 0 { continue @@ -273,7 +207,7 @@ func (fp *FinalityProviderInstance) finalitySigSubmissionLoop() { zap.Uint64("start_height", pollerBlocks[0].Height), zap.Uint64("end_height", targetHeight), ) - res, err := fp.retrySubmitFinalitySignatureUntilBlocksFinalized(pollerBlocks) + res, err := fp.retrySubmitSigsUntilFinalized(pollerBlocks) if err != nil { fp.metrics.IncrementFpTotalFailedVotes(fp.GetBtcPkHex()) if !errors.Is(err, ErrFinalityProviderShutDown) { @@ -295,6 +229,10 @@ func (fp *FinalityProviderInstance) finalitySigSubmissionLoop() { zap.Uint64("end_height", targetHeight), zap.String("tx_hash", res.TxHash), ) + + case <-fp.quit: + fp.logger.Info("the finality signature submission loop is closing") + return } } } @@ -304,6 +242,7 @@ func (fp *FinalityProviderInstance) getAllBlocksFromChan() []*types.BlockInfo { for { select { case b := <-fp.poller.GetBlockInfoChan(): + // TODO: in cases of catching up, this could issue frequent RPC calls shouldProcess, err := fp.shouldProcessBlock(b) if err != nil { if !errors.Is(err, ErrFinalityProviderShutDown) { @@ -314,6 +253,12 @@ func (fp *FinalityProviderInstance) getAllBlocksFromChan() []*types.BlockInfo { if shouldProcess { pollerBlocks = append(pollerBlocks, b) } + if len(pollerBlocks) == int(fp.cfg.BatchSubmissionSize) { + return pollerBlocks + } + case <-fp.quit: + fp.logger.Info("the get all blocks loop is closing") + return nil default: return pollerBlocks } @@ -322,9 +267,10 @@ func (fp *FinalityProviderInstance) getAllBlocksFromChan() []*types.BlockInfo { func (fp *FinalityProviderInstance) shouldProcessBlock(b *types.BlockInfo) (bool, error) { // check whether the block has been processed before - if fp.hasProcessed(b.Height) { + if fp.hasProcessed(b) { return false, nil } + // check whether the finality provider has voting power hasVp, err := fp.hasVotingPower(b.Height) if err != nil { @@ -337,6 +283,7 @@ func (fp *FinalityProviderInstance) shouldProcessBlock(b *types.BlockInfo) (bool fp.metrics.IncrementFpTotalBlocksWithoutVotingPower(fp.GetBtcPkHex()) return false, nil } + return true, nil } @@ -406,96 +353,12 @@ func (fp *FinalityProviderInstance) randomnessCommitmentLoop(startHeight uint64) } } -func (fp *FinalityProviderInstance) checkLaggingLoop() { - defer fp.wg.Done() - - if fp.cfg.FastSyncInterval == 0 { - fp.logger.Info("the fast sync is disabled") - return - } - - fastSyncTicker := time.NewTicker(fp.cfg.FastSyncInterval) - defer fastSyncTicker.Stop() - - for { - select { - case <-fastSyncTicker.C: - if fp.isLagging.Load() { - // we are in fast sync mode, skip do not do checks - continue - } - - latestBlockHeight, err := fp.getLatestBlockHeightWithRetry() - if err != nil { - fp.logger.Debug( - "failed to get the latest block of the consumer chain", - zap.String("pk", fp.GetBtcPkHex()), - zap.Error(err), - ) - continue - } - - if fp.checkLagging(latestBlockHeight) { - fp.isLagging.Store(true) - fp.laggingTargetChan <- latestBlockHeight - } - case <-fp.quit: - fp.logger.Debug("the fast sync loop is closing") - return - } - } -} - -func (fp *FinalityProviderInstance) tryFastSync(targetBlockHeight uint64) (*FastSyncResult, error) { - if fp.inSync.Load() { - return nil, fmt.Errorf("the finality-provider %s is already in sync", fp.GetBtcPkHex()) - } - - // get the last finalized height - lastFinalizedBlock, err := fp.latestFinalizedBlockWithRetry() - if err != nil { - return nil, err - } - if lastFinalizedBlock == nil { - fp.logger.Debug( - "no finalized blocks yet, no need to catch up", - zap.String("pk", fp.GetBtcPkHex()), - zap.Uint64("height", targetBlockHeight), - ) - return nil, nil - } - - lastFinalizedHeight := lastFinalizedBlock.Height - lastProcessedHeight := fp.GetLastProcessedHeight() - - // get the startHeight from the maximum of the lastVotedHeight and - // the lastFinalizedHeight plus 1 - var startHeight uint64 - if lastFinalizedHeight < lastProcessedHeight { - startHeight = lastProcessedHeight + 1 - } else { - startHeight = lastFinalizedHeight + 1 - } - - if startHeight > targetBlockHeight { - return nil, fmt.Errorf("the start height %v should not be higher than the current block %v", startHeight, targetBlockHeight) - } - - fp.logger.Debug("the finality-provider is entering fast sync", - zap.String("pk", fp.GetBtcPkHex()), - zap.Uint64("start_height", startHeight), - zap.Uint64("target_block_height", targetBlockHeight), - ) - - return fp.FastSync(startHeight, targetBlockHeight) -} - -func (fp *FinalityProviderInstance) hasProcessed(blockHeight uint64) bool { - if blockHeight <= fp.GetLastProcessedHeight() { +func (fp *FinalityProviderInstance) hasProcessed(b *types.BlockInfo) bool { + if b.Height <= fp.GetLastProcessedHeight() { fp.logger.Debug( "the block has been processed before, skip processing", zap.String("pk", fp.GetBtcPkHex()), - zap.Uint64("block_height", blockHeight), + zap.Uint64("block_height", b.Height), zap.Uint64("last_processed_height", fp.GetLastProcessedHeight()), ) return true @@ -530,73 +393,72 @@ func (fp *FinalityProviderInstance) reportCriticalErr(err error) { } } -// checkLagging returns true if the lasted voted height is behind by a configured gap -func (fp *FinalityProviderInstance) checkLagging(currentBlockHeight uint64) bool { - return currentBlockHeight >= fp.GetLastProcessedHeight()+fp.cfg.FastSyncGap -} - -// retrySubmitFinalitySignatureUntilBlocksFinalized periodically tries to submit finality signature until success or the block is finalized +// retrySubmitSigsUntilFinalized periodically tries to submit finality signature until success or the block is finalized // error will be returned if maximum retries have been reached or the query to the consumer chain fails -func (fp *FinalityProviderInstance) retrySubmitFinalitySignatureUntilBlocksFinalized(targetBlocks []*types.BlockInfo) (*types.TxResponse, error) { +func (fp *FinalityProviderInstance) retrySubmitSigsUntilFinalized(targetBlocks []*types.BlockInfo) (*types.TxResponse, error) { + if len(targetBlocks) == 0 { + return nil, fmt.Errorf("cannot send signatures for empty blocks") + } + var failedCycles uint32 targetHeight := targetBlocks[len(targetBlocks)-1].Height + // we break the for loop if the block is finalized or the signature is successfully submitted // error will be returned if maximum retries have been reached or the query to the consumer chain fails for { - // error will be returned if max retries have been reached - var res *types.TxResponse - var err error - if len(targetBlocks) == 1 { - res, err = fp.SubmitFinalitySignature(targetBlocks[0]) - } else { - res, err = fp.SubmitBatchFinalitySignatures(targetBlocks) - } - if err != nil { - fp.logger.Debug( - "failed to submit finality signature to the consumer chain", - zap.String("pk", fp.GetBtcPkHex()), - zap.Uint32("current_failures", failedCycles), - zap.Uint64("target_start_height", targetBlocks[0].Height), - zap.Uint64("target_end_height", targetHeight), - zap.Error(err), - ) - - if fpcc.IsUnrecoverable(err) { - return nil, err - } - - if fpcc.IsExpected(err) { - return nil, nil - } - - failedCycles += 1 - if failedCycles > uint32(fp.cfg.MaxSubmissionRetries) { - return nil, fmt.Errorf("reached max failed cycles with err: %w", err) - } - } else { - // the signature has been successfully submitted - return res, nil - } select { case <-time.After(fp.cfg.SubmissionRetryInterval): - // periodically query the index block to be later checked whether it is Finalized - finalized, err := fp.consumerCon.QueryIsBlockFinalized(targetHeight) + // error will be returned if max retries have been reached + var res *types.TxResponse + var err error + res, err = fp.SubmitBatchFinalitySignatures(targetBlocks) if err != nil { - return nil, fmt.Errorf("failed to query block finalization at height %v: %w", targetHeight, err) - } - if finalized { fp.logger.Debug( - "the block is already finalized, skip submission", + "failed to submit finality signature to the consumer chain", zap.String("pk", fp.GetBtcPkHex()), - zap.Uint64("target_height", targetHeight), + zap.Uint32("current_failures", failedCycles), + zap.Uint64("target_start_height", targetBlocks[0].Height), + zap.Uint64("target_end_height", targetHeight), + zap.Error(err), ) - // TODO: returning nil here is to safely break the loop - // the error still exists - return nil, nil + + if fpcc.IsUnrecoverable(err) { + return nil, err + } + + if fpcc.IsExpected(err) { + return nil, nil + } + + failedCycles += 1 + if failedCycles > uint32(fp.cfg.MaxSubmissionRetries) { + return nil, fmt.Errorf("reached max failed cycles with err: %w", err) + } + } else { + // the signature has been successfully submitted + return res, nil + } + select { + case <-time.After(fp.cfg.SubmissionRetryInterval): + // periodically query the index block to be later checked whether it is Finalized + finalized, err := fp.consumerCon.QueryIsBlockFinalized(targetHeight) + if err != nil { + return nil, fmt.Errorf("failed to query block finalization at height %v: %w", targetHeight, err) + } + if finalized { + fp.logger.Debug( + "the block is already finalized, skip submission", + zap.String("pk", fp.GetBtcPkHex()), + zap.Uint64("target_height", targetHeight), + ) + // TODO: returning nil here is to safely break the loop + // the error still exists + return nil, nil + } + case <-fp.quit: + fp.logger.Debug("the finality-provider instance is closing", zap.String("pk", fp.GetBtcPkHex())) + return nil, ErrFinalityProviderShutDown } - case <-fp.quit: - fp.logger.Debug("the finality-provider instance is closing", zap.String("pk", fp.GetBtcPkHex())) - return nil, ErrFinalityProviderShutDown } } } @@ -722,7 +584,7 @@ func (fp *FinalityProviderInstance) CommitPubRand(targetBlockHeight uint64) (*ty if lastCommittedHeight == uint64(0) { // the finality-provider has never submitted public rand before startHeight = targetBlockHeight - } else if lastCommittedHeight < fp.cfg.MinRandHeightGap+targetBlockHeight { + } else if lastCommittedHeight < uint64(fp.cfg.MinRandHeightGap)+targetBlockHeight { // (should not use subtraction because they are in the type of uint64) // we are running out of the randomness startHeight = lastCommittedHeight + 1 @@ -745,7 +607,7 @@ func (fp *FinalityProviderInstance) commitPubRandPairs(startHeight uint64) (*typ // NOTE: currently, calling this will create and save a list of randomness // in case of failure, randomness that has been created will be overwritten // for safety reason as the same randomness must not be used twice - pubRandList, err := fp.GetPubRandList(startHeight, fp.cfg.NumPubRand) + pubRandList, err := fp.GetPubRandList(startHeight, uint64(fp.cfg.NumPubRand)) if err != nil { return nil, fmt.Errorf("failed to generate randomness: %w", err) } @@ -840,7 +702,7 @@ func (fp *FinalityProviderInstance) TestCommitPubRandWithStartHeight(startHeight if err != nil { return err } - lastCommittedHeight = startHeight + fp.cfg.NumPubRand - 1 + lastCommittedHeight = startHeight + uint64(fp.cfg.NumPubRand) - 1 startHeight = lastCommittedHeight + 1 fp.logger.Info("Committed pubrand to block height", zap.Uint64("height", lastCommittedHeight)) } @@ -851,44 +713,7 @@ func (fp *FinalityProviderInstance) TestCommitPubRandWithStartHeight(startHeight // SubmitFinalitySignature builds and sends a finality signature over the given block to the consumer chain func (fp *FinalityProviderInstance) SubmitFinalitySignature(b *types.BlockInfo) (*types.TxResponse, error) { - sig, err := fp.SignFinalitySig(b) - if err != nil { - return nil, err - } - - // get public randomness at the height - prList, err := fp.GetPubRandList(b.Height, 1) - if err != nil { - return nil, fmt.Errorf("failed to get public randomness list: %v", err) - } - pubRand := prList[0] - - // get inclusion proof - proofBytes, err := fp.pubRandState.GetPubRandProof(pubRand) - if err != nil { - return nil, fmt.Errorf( - "failed to get inclusion proof of public randomness %s for FP %s for block %d: %w", - pubRand.String(), - fp.btcPk.MarshalHex(), - b.Height, - err, - ) - } - - // send finality signature to the consumer chain - res, err := fp.consumerCon.SubmitFinalitySig(fp.GetBtcPk(), b, pubRand, proofBytes, sig.ToModNScalar()) - if err != nil { - return nil, fmt.Errorf("failed to send finality signature to the consumer chain: %w", err) - } - - // update DB - fp.MustUpdateStateAfterFinalitySigSubmission(b.Height) - - // update metrics - fp.metrics.RecordFpVoteTime(fp.GetBtcPkHex()) - fp.metrics.IncrementFpTotalVotedBlocks(fp.GetBtcPkHex()) - - return res, nil + return fp.SubmitBatchFinalitySignatures([]*types.BlockInfo{b}) } // SubmitBatchFinalitySignatures builds and sends a finality signature over the given block to the consumer chain @@ -934,7 +759,7 @@ func (fp *FinalityProviderInstance) SubmitBatchFinalitySignatures(blocks []*type } // TestSubmitFinalitySignatureAndExtractPrivKey is exposed for presentation/testing purpose to allow manual sending finality signature -// this API is the same as SubmitFinalitySignature except that we don't constraint the voting height and update status +// this API is the same as SubmitBatchFinalitySignatures except that we don't constraint the voting height and update status // Note: this should not be used in the submission loop func (fp *FinalityProviderInstance) TestSubmitFinalitySignatureAndExtractPrivKey(b *types.BlockInfo) (*types.TxResponse, *btcec.PrivateKey, error) { // get public randomness diff --git a/finality-provider/service/fp_instance_test.go b/finality-provider/service/fp_instance_test.go index d5dfb17b..3d1def64 100644 --- a/finality-provider/service/fp_instance_test.go +++ b/finality-provider/service/fp_instance_test.go @@ -11,6 +11,7 @@ import ( "go.uber.org/zap" "github.com/babylonlabs-io/babylon/testutil/datagen" + ccapi "github.com/babylonlabs-io/finality-provider/clientcontroller/api" "github.com/babylonlabs-io/finality-provider/eotsmanager" eotscfg "github.com/babylonlabs-io/finality-provider/eotsmanager/config" @@ -48,7 +49,7 @@ func FuzzCommitPubRandList(f *testing.F) { }) } -func FuzzSubmitFinalitySig(f *testing.F) { +func FuzzSubmitFinalitySigs(f *testing.F) { testutil.AddRandomSeedsToFuzzer(f, 10) f.Fuzz(func(t *testing.T, seed int64) { r := rand.New(rand.NewSource(seed)) @@ -87,9 +88,9 @@ func FuzzSubmitFinalitySig(f *testing.F) { } expectedTxHash := testutil.GenRandomHexStr(r, 32) mockConsumerController.EXPECT(). - SubmitFinalitySig(fpIns.GetBtcPk(), nextBlock, gomock.Any(), gomock.Any(), gomock.Any()). + SubmitBatchFinalitySigs(fpIns.GetBtcPk(), []*types.BlockInfo{nextBlock}, gomock.Any(), gomock.Any(), gomock.Any()). Return(&types.TxResponse{TxHash: expectedTxHash}, nil).AnyTimes() - providerRes, err := fpIns.SubmitFinalitySignature(nextBlock) + providerRes, err := fpIns.SubmitBatchFinalitySignatures([]*types.BlockInfo{nextBlock}) require.NoError(t, err) require.Equal(t, expectedTxHash, providerRes.TxHash) diff --git a/itest/babylon/babylon_e2e_test.go b/itest/babylon/babylon_e2e_test.go index 54278d4b..9ae630d4 100644 --- a/itest/babylon/babylon_e2e_test.go +++ b/itest/babylon/babylon_e2e_test.go @@ -9,10 +9,11 @@ import ( "time" "github.com/babylonlabs-io/babylon/testutil/datagen" - e2eutils "github.com/babylonlabs-io/finality-provider/itest" "github.com/btcsuite/btcd/btcec/v2" "github.com/stretchr/testify/require" + e2eutils "github.com/babylonlabs-io/finality-provider/itest" + "github.com/babylonlabs-io/finality-provider/finality-provider/proto" "github.com/babylonlabs-io/finality-provider/types" ) @@ -52,7 +53,6 @@ func TestFinalityProviderLifeCycle(t *testing.T) { // stop the FP for several blocks and disable fast sync, and then restart FP // finality signature submission should get into the default case var n uint = 3 - tm.FpConfig.FastSyncInterval = 0 // finality signature submission would take about 5 seconds // set the poll interval to 2 seconds to make sure the poller channel has multiple blocks tm.FpConfig.PollerConfig.PollInterval = 2 * time.Second @@ -159,7 +159,6 @@ func TestFastSync(t *testing.T) { var n uint = 3 // stop the finality-provider for a few blocks then restart to trigger the fast sync - tm.FpConfig.FastSyncGap = uint64(n) tm.StopAndRestartFpAfterNBlocks(t, n, fpIns) // check there are n+1 blocks finalized