From 7896c7aad56c0c970163120240e17852abe6139d Mon Sep 17 00:00:00 2001 From: Lukasz Zimnoch Date: Wed, 7 Feb 2024 19:03:02 +0100 Subject: [PATCH 1/2] Single-shot DKG The currently used DKG retry mechanism based on random exclusion turned out to be ineffective for a higher number of participating operators. Such retries have a very small chance for success and produce a lot of unnecessary network traffic that consumes bandwidth and CPU excessively. Here we aim to improve the situation. First, we are making DKG a single-shot process which fails fast if the result cannot be produced during the first attempt. Second, we are doubling down the announcement period to maximize participation chance for all selected operators, even those being at the edge of the network. Last but not least, we are reducing the submission delay that is preserved between operators attempting to submit the final result on-chain. All those changes combined allow to achieve shorter DKG iterations that can be timed out quicker. This way, we will be able to repeat DKG more often, with different operator sets. --- pkg/tbtc/dkg.go | 7 ++- pkg/tbtc/dkg_loop.go | 13 ++++- pkg/tbtc/dkg_loop_test.go | 54 ++++++++++++++++----- solidity/ecdsa/contracts/WalletRegistry.sol | 8 +-- 4 files changed, 64 insertions(+), 18 deletions(-) diff --git a/pkg/tbtc/dkg.go b/pkg/tbtc/dkg.go index 162a01269e..bfd0cd85a5 100644 --- a/pkg/tbtc/dkg.go +++ b/pkg/tbtc/dkg.go @@ -28,7 +28,7 @@ const ( // is used to calculate the submission delay period that should be respected // by the given member to avoid all members submitting the same DKG result // at the same time. - dkgResultSubmissionDelayStepBlocks = 15 + dkgResultSubmissionDelayStepBlocks = 3 // dkgResultApprovalDelayStepBlocks determines the delay step in blocks // that is used to calculate the approval delay period that should be // respected by the given member to avoid all members approving the same @@ -39,6 +39,10 @@ const ( // submission. Once the period elapses, the DKG state is checked to confirm // the challenge was accepted successfully. dkgResultChallengeConfirmationBlocks = 20 + // dkgAttemptsLimit determines the maximum number of attempts to execute + // the DKG protocol. If the limit is reached, the protocol execution is + // aborted. + dkgAttemptsLimit = 1 ) // dkgExecutor is a component responsible for the full execution of ECDSA @@ -317,6 +321,7 @@ func (de *dkgExecutor) generateSigningGroup( groupSelectionResult.OperatorsAddresses, de.groupParameters, announcer, + dkgAttemptsLimit, ) result, err := retryLoop.start( diff --git a/pkg/tbtc/dkg_loop.go b/pkg/tbtc/dkg_loop.go index b66f67477e..082f7310b0 100644 --- a/pkg/tbtc/dkg_loop.go +++ b/pkg/tbtc/dkg_loop.go @@ -23,7 +23,7 @@ const ( // dkgAttemptAnnouncementActiveBlocks determines the duration of the // announcement phase that is performed at the beginning of each DKG // attempt. - dkgAttemptAnnouncementActiveBlocks = 5 + dkgAttemptAnnouncementActiveBlocks = 10 // dkgAttemptProtocolBlocks determines the maximum block duration of the // actual protocol computations. dkgAttemptMaximumProtocolBlocks = 200 @@ -72,6 +72,8 @@ type dkgRetryLoop struct { // Used for the random operator selection. It never changes. attemptSeed int64 attemptDelayBlocks uint64 + + attemptsLimit uint } func newDkgRetryLoop( @@ -82,6 +84,7 @@ func newDkgRetryLoop( selectedOperators chain.Addresses, groupParameters *GroupParameters, announcer dkgAnnouncer, + attemptsLimit uint, ) *dkgRetryLoop { // Compute the 8-byte seed needed for the random retry algorithm. We take // the first 8 bytes of the hash of the DKG seed. This allows us to not @@ -101,6 +104,7 @@ func newDkgRetryLoop( attemptStartBlock: initialStartBlock, attemptSeed: attemptSeed, attemptDelayBlocks: 5, + attemptsLimit: attemptsLimit, } } @@ -126,6 +130,13 @@ func (drl *dkgRetryLoop) start( for { drl.attemptCounter++ + if drl.attemptsLimit != 0 && drl.attemptCounter > drl.attemptsLimit { + return nil, fmt.Errorf( + "reached the limit of attempts [%v]", + drl.attemptsLimit, + ) + } + // In order to start attempts >1 in the right place, we need to // determine how many blocks were taken by previous attempts. We assume // the worst case that each attempt failed at the end of the DKG diff --git a/pkg/tbtc/dkg_loop_test.go b/pkg/tbtc/dkg_loop_test.go index 61d59b691a..779b3ac184 100644 --- a/pkg/tbtc/dkg_loop_test.go +++ b/pkg/tbtc/dkg_loop_test.go @@ -60,6 +60,7 @@ func TestDkgRetryLoop(t *testing.T) { ctxFn func() (context.Context, context.CancelFunc) incomingAnnouncementsFn func(sessionID string) ([]group.MemberIndex, error) dkgAttemptFn dkgAttemptFn + attemptsLimit uint expectedErr error expectedResult *dkg.Result expectedLastAttempt *dkgAttemptParams @@ -75,12 +76,13 @@ func TestDkgRetryLoop(t *testing.T) { dkgAttemptFn: func(attempt *dkgAttemptParams) (*dkg.Result, error) { return testResult, nil }, + attemptsLimit: 0, // no limit expectedErr: nil, expectedResult: testResult, expectedLastAttempt: &dkgAttemptParams{ number: 1, - startBlock: 206, - timeoutBlock: 406, // start block + 200 + startBlock: 211, + timeoutBlock: 411, // start block + 200 excludedMembersIndexes: []group.MemberIndex{}, }, }, @@ -96,6 +98,7 @@ func TestDkgRetryLoop(t *testing.T) { dkgAttemptFn: func(attempt *dkgAttemptParams) (*dkg.Result, error) { return testResult, nil }, + attemptsLimit: 0, // no limit expectedErr: nil, expectedResult: testResult, // As only 8 members (group quorum) announced their readiness, @@ -103,8 +106,8 @@ func TestDkgRetryLoop(t *testing.T) { // Not ready members are excluded. expectedLastAttempt: &dkgAttemptParams{ number: 1, - startBlock: 206, - timeoutBlock: 406, // start block + 200 + startBlock: 211, + timeoutBlock: 411, // start block + 200 excludedMembersIndexes: []group.MemberIndex{9, 10}, }, }, @@ -124,14 +127,15 @@ func TestDkgRetryLoop(t *testing.T) { dkgAttemptFn: func(attempt *dkgAttemptParams) (*dkg.Result, error) { return testResult, nil }, + attemptsLimit: 0, // no limit expectedErr: nil, expectedResult: testResult, // First attempt fails because the group quorum did not announce // readiness. expectedLastAttempt: &dkgAttemptParams{ number: 2, - startBlock: 417, // 206 + 1 * (6 + 200 + 5) - timeoutBlock: 617, // start block + 200 + startBlock: 427, // 211 + 1 * (11 + 200 + 5) + timeoutBlock: 627, // start block + 200 excludedMembersIndexes: []group.MemberIndex{2, 5}, }, }, @@ -150,13 +154,14 @@ func TestDkgRetryLoop(t *testing.T) { dkgAttemptFn: func(attempt *dkgAttemptParams) (*dkg.Result, error) { return testResult, nil }, + attemptsLimit: 0, // no limit expectedErr: nil, expectedResult: testResult, // First attempt fails due to the announcer error. expectedLastAttempt: &dkgAttemptParams{ number: 2, - startBlock: 417, // 206 + 1 * (6 + 200 + 5) - timeoutBlock: 617, // start block + 200 + startBlock: 427, // 211 + 1 * (11 + 200 + 5) + timeoutBlock: 627, // start block + 200 excludedMembersIndexes: []group.MemberIndex{2, 5}, }, }, @@ -175,6 +180,7 @@ func TestDkgRetryLoop(t *testing.T) { return testResult, nil }, + attemptsLimit: 0, // no limit expectedErr: nil, expectedResult: testResult, // The DKG error occurs on attempt 1. For attempt 2, all members are @@ -183,8 +189,8 @@ func TestDkgRetryLoop(t *testing.T) { // given seed. expectedLastAttempt: &dkgAttemptParams{ number: 2, - startBlock: 417, // 206 + 1 * (6 + 200 + 5) - timeoutBlock: 617, // start block + 150 + startBlock: 427, // 211 + 1 * (11 + 200 + 5) + timeoutBlock: 627, // start block + 200 excludedMembersIndexes: []group.MemberIndex{2, 5}, }, }, @@ -203,6 +209,7 @@ func TestDkgRetryLoop(t *testing.T) { return testResult, nil }, + attemptsLimit: 0, // no limit expectedErr: nil, expectedResult: testResult, // Member 5 is the executing one. First attempt fails and is @@ -211,8 +218,8 @@ func TestDkgRetryLoop(t *testing.T) { // member 5 skips attempt 2 and succeeds on attempt 3. expectedLastAttempt: &dkgAttemptParams{ number: 3, - startBlock: 628, // 206 + 2 * (6 + 200 + 5) - timeoutBlock: 828, // start block + 200 + startBlock: 643, // 211 + 2 * (11 + 200 + 5) + timeoutBlock: 843, // start block + 200 excludedMembersIndexes: []group.MemberIndex{9}, }, }, @@ -230,10 +237,32 @@ func TestDkgRetryLoop(t *testing.T) { dkgAttemptFn: func(attempt *dkgAttemptParams) (*dkg.Result, error) { return nil, fmt.Errorf("invalid data") }, + attemptsLimit: 0, // no limit expectedErr: context.Canceled, expectedResult: nil, expectedLastAttempt: nil, }, + "attempts limit reached": { + memberIndex: 1, + ctxFn: func() (context.Context, context.CancelFunc) { + return context.WithTimeout(context.Background(), 10*time.Second) + }, + incomingAnnouncementsFn: func(sessionID string) ([]group.MemberIndex, error) { + // Force the first attempt's announcement failure. + if sessionID == fmt.Sprintf("%v-%v", seed, 1) { + return nil, fmt.Errorf("unexpected error") + } + + return membersIndexes, nil + }, + dkgAttemptFn: func(attempt *dkgAttemptParams) (*dkg.Result, error) { + return testResult, nil + }, + attemptsLimit: 1, + expectedErr: fmt.Errorf("reached the limit of attempts [1]"), + expectedResult: nil, + expectedLastAttempt: nil, + }, } for testName, test := range tests { @@ -251,6 +280,7 @@ func TestDkgRetryLoop(t *testing.T) { selectedOperators, groupParameters, announcer, + test.attemptsLimit, ) ctx, cancelCtx := test.ctxFn() diff --git a/solidity/ecdsa/contracts/WalletRegistry.sol b/solidity/ecdsa/contracts/WalletRegistry.sol index 94882c7de6..b24f2eeccd 100644 --- a/solidity/ecdsa/contracts/WalletRegistry.sol +++ b/solidity/ecdsa/contracts/WalletRegistry.sol @@ -323,9 +323,9 @@ contract WalletRegistry is // // DKG result submission timeout covers: // - 20 blocks required to confirm the DkgStarted event off-chain - // - 5 retries of the off-chain protocol that takes 211 blocks at most - // - 15 blocks to submit the result for each of the 100 members - // That gives: 20 + (5 * 211) + (15 * 100) = 2575 + // - 1 attempt of the off-chain protocol that takes 216 blocks at most + // - 3 blocks to submit the result for each of the 100 members + // That gives: 20 + (1 * 216) + (3 * 100) = 536 // // // The original DKG result submitter has 20 blocks to approve it before @@ -337,7 +337,7 @@ contract WalletRegistry is dkg.setSeedTimeout(11_520); dkg.setResultChallengePeriodLength(11_520); dkg.setResultChallengeExtraGas(50_000); - dkg.setResultSubmissionTimeout(2575); + dkg.setResultSubmissionTimeout(536); dkg.setSubmitterPrecedencePeriodLength(20); // Gas parameters were adjusted based on Ethereum state in April 2022. From 5049f77e68730688d687eb15ad4567ffdceeaea3 Mon Sep 17 00:00:00 2001 From: Lukasz Zimnoch Date: Wed, 7 Feb 2024 21:08:06 +0100 Subject: [PATCH 2/2] Use `BackoffRetransmissionStrategy` for DKG `resultSigningState` All DKG states use the `BackoffRetransmissionStrategy` strategy. There is no point to make an exception for the `resultSigningState`. This should reduce network load in case one of the participants fails at the end of the protocol. --- pkg/tecdsa/dkg/states.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pkg/tecdsa/dkg/states.go b/pkg/tecdsa/dkg/states.go index e8648ed587..417ebb85b8 100644 --- a/pkg/tecdsa/dkg/states.go +++ b/pkg/tecdsa/dkg/states.go @@ -389,7 +389,11 @@ func (rss *resultSigningState) Initiate(ctx context.Context) error { return err } - if err := rss.channel.Send(ctx, message); err != nil { + if err := rss.channel.Send( + ctx, + message, + net.BackoffRetransmissionStrategy, + ); err != nil { return err }