diff --git a/pkg/tbtc/heartbeat.go b/pkg/tbtc/heartbeat.go index 770d055879..c86afd88db 100644 --- a/pkg/tbtc/heartbeat.go +++ b/pkg/tbtc/heartbeat.go @@ -34,8 +34,8 @@ const ( // of 25 blocks is roughly 5 minutes, assuming 12 seconds per block. heartbeatTimeoutSafetyMarginBlocks = 25 // heartbeatSigningMinimumActiveOperators determines the minimum number of - // active operators during signing for a heartbeat to be considered valid. - heartbeatSigningMinimumActiveOperators = 70 + // active members during signing for a heartbeat to be considered valid. + heartbeatSigningMinimumActiveMembers = 70 // heartbeatConsecutiveFailuresThreshold determines the number of consecutive // heartbeat failures required to trigger inactivity operator notification. heartbeatConsecutiveFailureThreshold = 3 @@ -60,7 +60,7 @@ type heartbeatSigningExecutor interface { ctx context.Context, message *big.Int, startBlock uint64, - ) (*tecdsa.Signature, uint32, uint64, error) + ) (*tecdsa.Signature, *signingActivityReport, uint64, error) } // heartbeatInactivityClaimExecutor is an interface meant to decouple the @@ -168,51 +168,69 @@ func (ha *heartbeatAction) execute() error { ) defer cancelHeartbeatSigningCtx() - signature, activeOperatorsCount, _, err := ha.signingExecutor.sign( + signature, activityReport, _, err := ha.signingExecutor.sign( heartbeatSigningCtx, messageToSign, ha.startBlock, ) + if err != nil { + // Do not count this error as heartbeat inactivity failure. If the + // process returned an error here, that likely means the group signing + // threshold was not met. In such a case, the inactivity claim does not + // have a chance for success anyway (it needs the group threshold to + // be met as well). + return fmt.Errorf("heartbeat signing process errored out: [%v]", err) + } - // If there was no error and the number of active operators during signing - // was enough, we can consider the heartbeat procedure as successful. - if err == nil && activeOperatorsCount >= heartbeatSigningMinimumActiveOperators { + // If the number of active members during signing was enough, we can + // consider the heartbeat procedure as successful. + activeMembersCount := len(activityReport.activeMembers) + if activeMembersCount >= heartbeatSigningMinimumActiveMembers { ha.logger.Infof( - "successfully generated signature [%s] for heartbeat message [0x%x]", + "heartbeat generated signature [%s] for message [0x%x]", signature, ha.proposal.Message[:], ) - // Reset the counter for consecutive heartbeat failure. + // Reset the counter of consecutive heartbeat inactivity failures. ha.failureCounter.reset(walletKey) return nil } - // If there was an error or the number of active operators during signing - // was not enough, we must consider the heartbeat procedure as a failure. + // If the number of active members during signing was not enough, we + // must consider the heartbeat procedure as an inactivity failure. ha.logger.Warnf( - "heartbeat failed; [%d/%d] operators participated; the process "+ - "returned [%v] as error", - activeOperatorsCount, - heartbeatSigningMinimumActiveOperators, - err, + "heartbeat generated signature but minimum activity "+ + "threshold was not met ([%d/%d] members participated); "+ + "counting it as inactivity failure", + activeMembersCount, + heartbeatSigningMinimumActiveMembers, ) - // Increment the heartbeat failure counter. + // Increment the heartbeat inactivity failure counter. ha.failureCounter.increment(walletKey) - // If the number of consecutive heartbeat failures does not exceed the - // threshold do not notify about operator inactivity. + // If the number of consecutive heartbeat inactivity failures does not + // exceed the threshold, do not issue an inactivity claim yet. if ha.failureCounter.get(walletKey) < heartbeatConsecutiveFailureThreshold { ha.logger.Warnf( - "leaving without notifying about operator inactivity; current "+ - "heartbeat failure count is [%d]", + "not issuing an inactivity claim yet; current consecutive"+ + "heartbeat inactivity failure count is [%d/%d]", ha.failureCounter.get(walletKey), + heartbeatConsecutiveFailureThreshold, ) return nil } + // This should not happen but check it just in case as inactivity claim + // requires a non-empty inactive members set. + if len(activityReport.inactiveMembers) == 0 { + return fmt.Errorf( + "inactivity claim aborted due to an undetermined set of inactive members", + ) + } + heartbeatInactivityCtx, cancelHeartbeatInactivityCtx := withCancelOnBlock( context.Background(), ha.expiryBlock-heartbeatTimeoutSafetyMarginBlocks, @@ -220,15 +238,14 @@ func (ha *heartbeatAction) execute() error { ) defer cancelHeartbeatInactivityCtx() - // The value of consecutive heartbeat failures exceeds the threshold. - // Proceed with operator inactivity notification. + // The value of consecutive heartbeat inactivity failures exceeds the threshold. + // Proceed with operator inactivity claim. err = ha.inactivityClaimExecutor.claimInactivity( heartbeatInactivityCtx, - // Leave the list of inactive operators empty even if some operators - // were inactive during signing heartbeat. The inactive operators could - // simply be in the process of unstaking and therefore should not be - // punished. - []group.MemberIndex{}, + // It's safe to consider unstaking members as inactive members in the claim. + // Inactive members are set ineligible for on-chain rewards for a certain + // period of time. This is a desired outcome for unstaking members as well. + activityReport.inactiveMembers, true, messageToSign, ) diff --git a/pkg/tbtc/heartbeat_test.go b/pkg/tbtc/heartbeat_test.go index 6946904201..c635659a08 100644 --- a/pkg/tbtc/heartbeat_test.go +++ b/pkg/tbtc/heartbeat_test.go @@ -6,6 +6,7 @@ import ( "encoding/hex" "fmt" "math/big" + "reflect" "testing" "github.com/keep-network/keep-core/internal/testutils" @@ -51,7 +52,7 @@ func TestHeartbeatAction_HappyPath(t *testing.T) { // Set the active operators count to the minimum required value. mockExecutor := &mockHeartbeatSigningExecutor{} - mockExecutor.activeOperatorsCount = heartbeatSigningMinimumActiveOperators + mockExecutor.activeOperatorsCount = heartbeatSigningMinimumActiveMembers inactivityClaimExecutor := &mockInactivityClaimExecutor{} @@ -130,7 +131,7 @@ func TestHeartbeatAction_OperatorUnstaking(t *testing.T) { // Set the active operators count to the minimum required value. mockExecutor := &mockHeartbeatSigningExecutor{} - mockExecutor.activeOperatorsCount = heartbeatSigningMinimumActiveOperators + mockExecutor.activeOperatorsCount = heartbeatSigningMinimumActiveMembers inactivityClaimExecutor := &mockInactivityClaimExecutor{} @@ -193,7 +194,7 @@ func TestHeartbeatAction_Failure_SigningError(t *testing.T) { mockExecutor := &mockHeartbeatSigningExecutor{} mockExecutor.shouldFail = true - mockExecutor.activeOperatorsCount = heartbeatSigningMinimumActiveOperators + mockExecutor.activeOperatorsCount = heartbeatSigningMinimumActiveMembers inactivityClaimExecutor := &mockInactivityClaimExecutor{} @@ -217,14 +218,22 @@ func TestHeartbeatAction_Failure_SigningError(t *testing.T) { // Do not expect the execution to result in an error. Signing error does not // mean the procedure failure. err = action.execute() - if err != nil { - t.Fatal(err) + + expectedError := fmt.Errorf("heartbeat signing process errored out: [oofta]") + if !reflect.DeepEqual(expectedError, err) { + t.Errorf( + "unexpected error\n"+ + "expected: %v\n"+ + "actual: %v\n", + expectedError, + err, + ) } testutils.AssertUintsEqual( t, "heartbeat failure count", - 1, + 0, uint64(heartbeatFailureCounter.get(walletPublicKeyStr)), ) testutils.AssertBigIntsEqual( @@ -264,7 +273,7 @@ func TestHeartbeatAction_Failure_TooFewActiveOperators(t *testing.T) { // Set the active operators count just below the required number. mockExecutor := &mockHeartbeatSigningExecutor{} - mockExecutor.activeOperatorsCount = heartbeatSigningMinimumActiveOperators - 1 + mockExecutor.activeOperatorsCount = heartbeatSigningMinimumActiveMembers - 1 inactivityClaimExecutor := &mockInactivityClaimExecutor{} @@ -344,7 +353,7 @@ func TestHeartbeatAction_Failure_CounterExceeded(t *testing.T) { hostChain.setHeartbeatProposalValidationResult(proposal, true) mockExecutor := &mockHeartbeatSigningExecutor{} - mockExecutor.shouldFail = true + mockExecutor.activeOperatorsCount = heartbeatSigningMinimumActiveMembers - 1 inactivityClaimExecutor := &mockInactivityClaimExecutor{} @@ -424,7 +433,7 @@ func TestHeartbeatAction_Failure_InactivityExecutionFailure(t *testing.T) { hostChain.setHeartbeatProposalValidationResult(proposal, true) mockExecutor := &mockHeartbeatSigningExecutor{} - mockExecutor.shouldFail = true + mockExecutor.activeOperatorsCount = heartbeatSigningMinimumActiveMembers - 1 inactivityClaimExecutor := &mockInactivityClaimExecutor{} inactivityClaimExecutor.shouldFail = true @@ -603,15 +612,31 @@ func (mhse *mockHeartbeatSigningExecutor) sign( ctx context.Context, message *big.Int, startBlock uint64, -) (*tecdsa.Signature, uint32, uint64, error) { +) (*tecdsa.Signature, *signingActivityReport, uint64, error) { mhse.requestedMessage = message mhse.requestedStartBlock = startBlock if mhse.shouldFail { - return nil, 0, 0, fmt.Errorf("oofta") + return nil, nil, 0, fmt.Errorf("oofta") + } + + activeMembers := make([]group.MemberIndex, 0) + inactiveMembers := make([]group.MemberIndex, 0) + + for memberIndex := uint32(1); memberIndex <= 100; memberIndex++ { + if memberIndex <= mhse.activeOperatorsCount { + activeMembers = append(activeMembers, group.MemberIndex(memberIndex)) + } else { + inactiveMembers = append(inactiveMembers, group.MemberIndex(memberIndex)) + } + } + + activityReport := &signingActivityReport{ + activeMembers: activeMembers, + inactiveMembers: inactiveMembers, } - return &tecdsa.Signature{}, mhse.activeOperatorsCount, startBlock + 1, nil + return &tecdsa.Signature{}, activityReport, startBlock + 1, nil } type mockInactivityClaimExecutor struct { diff --git a/pkg/tbtc/signing.go b/pkg/tbtc/signing.go index b4ab2b069e..370e8df583 100644 --- a/pkg/tbtc/signing.go +++ b/pkg/tbtc/signing.go @@ -174,9 +174,9 @@ func (se *signingExecutor) sign( ctx context.Context, message *big.Int, startBlock uint64, -) (*tecdsa.Signature, uint32, uint64, error) { +) (*tecdsa.Signature, *signingActivityReport, uint64, error) { if lockAcquired := se.lock.TryAcquire(1); !lockAcquired { - return nil, 0, 0, errSigningExecutorBusy + return nil, nil, 0, errSigningExecutorBusy } defer se.lock.Release(1) @@ -184,7 +184,7 @@ func (se *signingExecutor) sign( walletPublicKeyBytes, err := marshalPublicKey(wallet.publicKey) if err != nil { - return nil, 0, 0, fmt.Errorf("cannot marshal wallet public key: [%v]", err) + return nil, nil, 0, fmt.Errorf("cannot marshal wallet public key: [%v]", err) } loopTimeoutBlock := startBlock + @@ -198,9 +198,9 @@ func (se *signingExecutor) sign( ) type signingOutcome struct { - signature *tecdsa.Signature - activeMembersCount uint32 - endBlock uint64 + signature *tecdsa.Signature + activityReport *signingActivityReport + endBlock uint64 } wg := sync.WaitGroup{} @@ -367,9 +367,9 @@ func (se *signingExecutor) sign( ) signingOutcomeChan <- &signingOutcome{ - signature: loopResult.result.Signature, - activeMembersCount: loopResult.activeMembersCount, - endBlock: loopResult.latestEndBlock, + signature: loopResult.result.Signature, + activityReport: loopResult.activityReport, + endBlock: loopResult.latestEndBlock, } }(currentSigner) } @@ -386,9 +386,9 @@ func (se *signingExecutor) sign( // signer, that means all signers failed and have not produced a signature. select { case outcome := <-signingOutcomeChan: - return outcome.signature, outcome.activeMembersCount, outcome.endBlock, nil + return outcome.signature, outcome.activityReport, outcome.endBlock, nil default: - return nil, 0, 0, fmt.Errorf("all signers failed") + return nil, nil, 0, fmt.Errorf("all signers failed") } } diff --git a/pkg/tbtc/signing_loop.go b/pkg/tbtc/signing_loop.go index 48f881df03..7e787f1975 100644 --- a/pkg/tbtc/signing_loop.go +++ b/pkg/tbtc/signing_loop.go @@ -140,13 +140,20 @@ type signingAttemptParams struct { // signingAttemptFn represents a function performing a signing attempt. type signingAttemptFn func(*signingAttemptParams) (*signing.Result, uint64, error) +// signingActivityReport holds information about the activity of the signing +// group members during the signing process. +type signingActivityReport struct { + activeMembers []group.MemberIndex + inactiveMembers []group.MemberIndex +} + // signingRetryLoopResult represents the result of the signing retry loop. type signingRetryLoopResult struct { // result is the outcome of the signing process. result *signing.Result - // activeMembersCount is the number of members that participated in the - // signing process. - activeMembersCount uint32 + // activityReport holds information about the activity of the signing + // group members during the signing process. + activityReport *signingActivityReport // latestEndBlock is the block at which the slowest signer of the successful // signing attempt completed signature computation. This block is also // the common end block accepted by all other members of the signing group. @@ -409,9 +416,14 @@ func (srl *signingRetryLoop) start( continue } + activityReport := &signingActivityReport{ + activeMembers: readyMembersIndexes, + inactiveMembers: unreadyMembersIndexes, + } + return &signingRetryLoopResult{ result: result, - activeMembersCount: uint32(len(readyMembersIndexes)), + activityReport: activityReport, latestEndBlock: latestEndBlock, attemptTimeoutBlock: timeoutBlock, }, nil diff --git a/pkg/tbtc/signing_loop_test.go b/pkg/tbtc/signing_loop_test.go index cf3b02e716..93397a9ef2 100644 --- a/pkg/tbtc/signing_loop_test.go +++ b/pkg/tbtc/signing_loop_test.go @@ -100,8 +100,11 @@ func TestSigningRetryLoop(t *testing.T) { }, expectedErr: nil, expectedResult: &signingRetryLoopResult{ - result: testResult, - activeMembersCount: 10, + result: testResult, + activityReport: &signingActivityReport{ + activeMembers: signingGroupMembersIndexes, + inactiveMembers: []group.MemberIndex{}, + }, latestEndBlock: 215, // the end block resolved by the done check phase attemptTimeoutBlock: 236, // start block of the first attempt + 30 }, @@ -151,8 +154,11 @@ func TestSigningRetryLoop(t *testing.T) { }, expectedErr: nil, expectedResult: &signingRetryLoopResult{ - result: testResult, - activeMembersCount: 6, + result: testResult, + activityReport: &signingActivityReport{ + activeMembers: []group.MemberIndex{1, 2, 3, 6, 7, 9}, + inactiveMembers: []group.MemberIndex{4, 5, 8, 10}, + }, latestEndBlock: 215, // the end block resolved by the done check phase attemptTimeoutBlock: 236, // start block of the first attempt + 30 }, @@ -206,8 +212,11 @@ func TestSigningRetryLoop(t *testing.T) { }, expectedErr: nil, expectedResult: &signingRetryLoopResult{ - result: testResult, - activeMembersCount: 10, + result: testResult, + activityReport: &signingActivityReport{ + activeMembers: signingGroupMembersIndexes, + inactiveMembers: []group.MemberIndex{}, + }, latestEndBlock: 260, // the end block resolved by the done check phase attemptTimeoutBlock: 277, // start block of the second attempt + 30 }, @@ -263,8 +272,11 @@ func TestSigningRetryLoop(t *testing.T) { }, expectedErr: nil, expectedResult: &signingRetryLoopResult{ - result: testResult, - activeMembersCount: 10, + result: testResult, + activityReport: &signingActivityReport{ + activeMembers: signingGroupMembersIndexes, + inactiveMembers: []group.MemberIndex{}, + }, latestEndBlock: 260, // the end block resolved by the done check phase attemptTimeoutBlock: 277, // start block of the second attempt + 30 }, @@ -320,8 +332,11 @@ func TestSigningRetryLoop(t *testing.T) { }, expectedErr: nil, expectedResult: &signingRetryLoopResult{ - result: testResult, - activeMembersCount: 10, + result: testResult, + activityReport: &signingActivityReport{ + activeMembers: signingGroupMembersIndexes, + inactiveMembers: []group.MemberIndex{}, + }, latestEndBlock: 260, // the end block resolved by the done check phase attemptTimeoutBlock: 277, // start block of the second attempt + 30 }, @@ -369,8 +384,11 @@ func TestSigningRetryLoop(t *testing.T) { expectedOutgoingDoneChecks: nil, expectedErr: nil, expectedResult: &signingRetryLoopResult{ - result: testResult, - activeMembersCount: 10, + result: testResult, + activityReport: &signingActivityReport{ + activeMembers: signingGroupMembersIndexes, + inactiveMembers: []group.MemberIndex{}, + }, latestEndBlock: 260, // the end block resolved by the done check phase attemptTimeoutBlock: 277, // start block of the second attempt + 30 }, @@ -441,8 +459,11 @@ func TestSigningRetryLoop(t *testing.T) { }, expectedErr: nil, expectedResult: &signingRetryLoopResult{ - result: testResult, - activeMembersCount: 10, + result: testResult, + activityReport: &signingActivityReport{ + activeMembers: signingGroupMembersIndexes, + inactiveMembers: []group.MemberIndex{}, + }, latestEndBlock: 260, // the end block resolved by the done check phase attemptTimeoutBlock: 277, // start block of the second attempt + 30 }, @@ -547,8 +568,11 @@ func TestSigningRetryLoop(t *testing.T) { }, expectedErr: nil, expectedResult: &signingRetryLoopResult{ - result: testResult, - activeMembersCount: 10, + result: testResult, + activityReport: &signingActivityReport{ + activeMembers: signingGroupMembersIndexes, + inactiveMembers: []group.MemberIndex{}, + }, latestEndBlock: 260, // the end block resolved by the done check phase attemptTimeoutBlock: 277, // start block of the second attempt + 30 },