Skip to content

Commit

Permalink
Heartbeats: Changes to the inactivity claim trigger (#3816)
Browse files Browse the repository at this point in the history
Refs: #3796

Here we introduce the following changes to the heartbeat inactivity
claim trigger:
- So far, inactivity claims have been issued with an empty inactive
members set. This is a problem as the on-chain contract expects it to be
non-zero. We comply with this requirement here.
- Heartbeat signing errors are no longer counted as consecutive
heartbeat inactivity failures. If signing fails, the most probable cause
is the signing threshold that was not met. As an inactivity claim
requires that threshold to be met as well, there is no sense in
triggering that process in this case. Moreover, in case of a signing
error, the inactive members set cannot be determined which violates the
requirement from the first point
  • Loading branch information
tomaszslabon authored May 20, 2024
2 parents 56755f6 + 61376d2 commit 27a8b97
Show file tree
Hide file tree
Showing 5 changed files with 149 additions and 71 deletions.
73 changes: 45 additions & 28 deletions pkg/tbtc/heartbeat.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ const (
// of 25 blocks is roughly 5 minutes, assuming 12 seconds per block.
heartbeatTimeoutSafetyMarginBlocks = 25
// heartbeatSigningMinimumActiveOperators determines the minimum number of
// active operators during signing for a heartbeat to be considered valid.
heartbeatSigningMinimumActiveOperators = 70
// active members during signing for a heartbeat to be considered valid.
heartbeatSigningMinimumActiveMembers = 70
// heartbeatConsecutiveFailuresThreshold determines the number of consecutive
// heartbeat failures required to trigger inactivity operator notification.
heartbeatConsecutiveFailureThreshold = 3
Expand All @@ -60,7 +60,7 @@ type heartbeatSigningExecutor interface {
ctx context.Context,
message *big.Int,
startBlock uint64,
) (*tecdsa.Signature, uint32, uint64, error)
) (*tecdsa.Signature, *signingActivityReport, uint64, error)
}

// heartbeatInactivityClaimExecutor is an interface meant to decouple the
Expand Down Expand Up @@ -168,67 +168,84 @@ func (ha *heartbeatAction) execute() error {
)
defer cancelHeartbeatSigningCtx()

signature, activeOperatorsCount, _, err := ha.signingExecutor.sign(
signature, activityReport, _, err := ha.signingExecutor.sign(
heartbeatSigningCtx,
messageToSign,
ha.startBlock,
)
if err != nil {
// Do not count this error as heartbeat inactivity failure. If the
// process returned an error here, that likely means the group signing
// threshold was not met. In such a case, the inactivity claim does not
// have a chance for success anyway (it needs the group threshold to
// be met as well).
return fmt.Errorf("heartbeat signing process errored out: [%v]", err)
}

// If there was no error and the number of active operators during signing
// was enough, we can consider the heartbeat procedure as successful.
if err == nil && activeOperatorsCount >= heartbeatSigningMinimumActiveOperators {
// If the number of active members during signing was enough, we can
// consider the heartbeat procedure as successful.
activeMembersCount := len(activityReport.activeMembers)
if activeMembersCount >= heartbeatSigningMinimumActiveMembers {
ha.logger.Infof(
"successfully generated signature [%s] for heartbeat message [0x%x]",
"heartbeat generated signature [%s] for message [0x%x]",
signature,
ha.proposal.Message[:],
)

// Reset the counter for consecutive heartbeat failure.
// Reset the counter of consecutive heartbeat inactivity failures.
ha.failureCounter.reset(walletKey)

return nil
}

// If there was an error or the number of active operators during signing
// was not enough, we must consider the heartbeat procedure as a failure.
// If the number of active members during signing was not enough, we
// must consider the heartbeat procedure as an inactivity failure.
ha.logger.Warnf(
"heartbeat failed; [%d/%d] operators participated; the process "+
"returned [%v] as error",
activeOperatorsCount,
heartbeatSigningMinimumActiveOperators,
err,
"heartbeat generated signature but minimum activity "+
"threshold was not met ([%d/%d] members participated); "+
"counting it as inactivity failure",
activeMembersCount,
heartbeatSigningMinimumActiveMembers,
)

// Increment the heartbeat failure counter.
// Increment the heartbeat inactivity failure counter.
ha.failureCounter.increment(walletKey)

// If the number of consecutive heartbeat failures does not exceed the
// threshold do not notify about operator inactivity.
// If the number of consecutive heartbeat inactivity failures does not
// exceed the threshold, do not issue an inactivity claim yet.
if ha.failureCounter.get(walletKey) < heartbeatConsecutiveFailureThreshold {
ha.logger.Warnf(
"leaving without notifying about operator inactivity; current "+
"heartbeat failure count is [%d]",
"not issuing an inactivity claim yet; current consecutive"+
"heartbeat inactivity failure count is [%d/%d]",
ha.failureCounter.get(walletKey),
heartbeatConsecutiveFailureThreshold,
)
return nil
}

// This should not happen but check it just in case as inactivity claim
// requires a non-empty inactive members set.
if len(activityReport.inactiveMembers) == 0 {
return fmt.Errorf(
"inactivity claim aborted due to an undetermined set of inactive members",
)
}

heartbeatInactivityCtx, cancelHeartbeatInactivityCtx := withCancelOnBlock(
context.Background(),
ha.expiryBlock-heartbeatTimeoutSafetyMarginBlocks,
ha.waitForBlockFn,
)
defer cancelHeartbeatInactivityCtx()

// The value of consecutive heartbeat failures exceeds the threshold.
// Proceed with operator inactivity notification.
// The value of consecutive heartbeat inactivity failures exceeds the threshold.
// Proceed with operator inactivity claim.
err = ha.inactivityClaimExecutor.claimInactivity(
heartbeatInactivityCtx,
// Leave the list of inactive operators empty even if some operators
// were inactive during signing heartbeat. The inactive operators could
// simply be in the process of unstaking and therefore should not be
// punished.
[]group.MemberIndex{},
// It's safe to consider unstaking members as inactive members in the claim.
// Inactive members are set ineligible for on-chain rewards for a certain
// period of time. This is a desired outcome for unstaking members as well.
activityReport.inactiveMembers,
true,
messageToSign,
)
Expand Down
49 changes: 37 additions & 12 deletions pkg/tbtc/heartbeat_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"encoding/hex"
"fmt"
"math/big"
"reflect"
"testing"

"github.com/keep-network/keep-core/internal/testutils"
Expand Down Expand Up @@ -51,7 +52,7 @@ func TestHeartbeatAction_HappyPath(t *testing.T) {

// Set the active operators count to the minimum required value.
mockExecutor := &mockHeartbeatSigningExecutor{}
mockExecutor.activeOperatorsCount = heartbeatSigningMinimumActiveOperators
mockExecutor.activeOperatorsCount = heartbeatSigningMinimumActiveMembers

inactivityClaimExecutor := &mockInactivityClaimExecutor{}

Expand Down Expand Up @@ -130,7 +131,7 @@ func TestHeartbeatAction_OperatorUnstaking(t *testing.T) {

// Set the active operators count to the minimum required value.
mockExecutor := &mockHeartbeatSigningExecutor{}
mockExecutor.activeOperatorsCount = heartbeatSigningMinimumActiveOperators
mockExecutor.activeOperatorsCount = heartbeatSigningMinimumActiveMembers

inactivityClaimExecutor := &mockInactivityClaimExecutor{}

Expand Down Expand Up @@ -193,7 +194,7 @@ func TestHeartbeatAction_Failure_SigningError(t *testing.T) {

mockExecutor := &mockHeartbeatSigningExecutor{}
mockExecutor.shouldFail = true
mockExecutor.activeOperatorsCount = heartbeatSigningMinimumActiveOperators
mockExecutor.activeOperatorsCount = heartbeatSigningMinimumActiveMembers

inactivityClaimExecutor := &mockInactivityClaimExecutor{}

Expand All @@ -217,14 +218,22 @@ func TestHeartbeatAction_Failure_SigningError(t *testing.T) {
// Do not expect the execution to result in an error. Signing error does not
// mean the procedure failure.
err = action.execute()
if err != nil {
t.Fatal(err)

expectedError := fmt.Errorf("heartbeat signing process errored out: [oofta]")
if !reflect.DeepEqual(expectedError, err) {
t.Errorf(
"unexpected error\n"+
"expected: %v\n"+
"actual: %v\n",
expectedError,
err,
)
}

testutils.AssertUintsEqual(
t,
"heartbeat failure count",
1,
0,
uint64(heartbeatFailureCounter.get(walletPublicKeyStr)),
)
testutils.AssertBigIntsEqual(
Expand Down Expand Up @@ -264,7 +273,7 @@ func TestHeartbeatAction_Failure_TooFewActiveOperators(t *testing.T) {

// Set the active operators count just below the required number.
mockExecutor := &mockHeartbeatSigningExecutor{}
mockExecutor.activeOperatorsCount = heartbeatSigningMinimumActiveOperators - 1
mockExecutor.activeOperatorsCount = heartbeatSigningMinimumActiveMembers - 1

inactivityClaimExecutor := &mockInactivityClaimExecutor{}

Expand Down Expand Up @@ -344,7 +353,7 @@ func TestHeartbeatAction_Failure_CounterExceeded(t *testing.T) {
hostChain.setHeartbeatProposalValidationResult(proposal, true)

mockExecutor := &mockHeartbeatSigningExecutor{}
mockExecutor.shouldFail = true
mockExecutor.activeOperatorsCount = heartbeatSigningMinimumActiveMembers - 1

inactivityClaimExecutor := &mockInactivityClaimExecutor{}

Expand Down Expand Up @@ -424,7 +433,7 @@ func TestHeartbeatAction_Failure_InactivityExecutionFailure(t *testing.T) {
hostChain.setHeartbeatProposalValidationResult(proposal, true)

mockExecutor := &mockHeartbeatSigningExecutor{}
mockExecutor.shouldFail = true
mockExecutor.activeOperatorsCount = heartbeatSigningMinimumActiveMembers - 1

inactivityClaimExecutor := &mockInactivityClaimExecutor{}
inactivityClaimExecutor.shouldFail = true
Expand Down Expand Up @@ -603,15 +612,31 @@ func (mhse *mockHeartbeatSigningExecutor) sign(
ctx context.Context,
message *big.Int,
startBlock uint64,
) (*tecdsa.Signature, uint32, uint64, error) {
) (*tecdsa.Signature, *signingActivityReport, uint64, error) {
mhse.requestedMessage = message
mhse.requestedStartBlock = startBlock

if mhse.shouldFail {
return nil, 0, 0, fmt.Errorf("oofta")
return nil, nil, 0, fmt.Errorf("oofta")
}

activeMembers := make([]group.MemberIndex, 0)
inactiveMembers := make([]group.MemberIndex, 0)

for memberIndex := uint32(1); memberIndex <= 100; memberIndex++ {
if memberIndex <= mhse.activeOperatorsCount {
activeMembers = append(activeMembers, group.MemberIndex(memberIndex))
} else {
inactiveMembers = append(inactiveMembers, group.MemberIndex(memberIndex))
}
}

activityReport := &signingActivityReport{
activeMembers: activeMembers,
inactiveMembers: inactiveMembers,
}

return &tecdsa.Signature{}, mhse.activeOperatorsCount, startBlock + 1, nil
return &tecdsa.Signature{}, activityReport, startBlock + 1, nil
}

type mockInactivityClaimExecutor struct {
Expand Down
22 changes: 11 additions & 11 deletions pkg/tbtc/signing.go
Original file line number Diff line number Diff line change
Expand Up @@ -174,17 +174,17 @@ func (se *signingExecutor) sign(
ctx context.Context,
message *big.Int,
startBlock uint64,
) (*tecdsa.Signature, uint32, uint64, error) {
) (*tecdsa.Signature, *signingActivityReport, uint64, error) {
if lockAcquired := se.lock.TryAcquire(1); !lockAcquired {
return nil, 0, 0, errSigningExecutorBusy
return nil, nil, 0, errSigningExecutorBusy
}
defer se.lock.Release(1)

wallet := se.wallet()

walletPublicKeyBytes, err := marshalPublicKey(wallet.publicKey)
if err != nil {
return nil, 0, 0, fmt.Errorf("cannot marshal wallet public key: [%v]", err)
return nil, nil, 0, fmt.Errorf("cannot marshal wallet public key: [%v]", err)
}

loopTimeoutBlock := startBlock +
Expand All @@ -198,9 +198,9 @@ func (se *signingExecutor) sign(
)

type signingOutcome struct {
signature *tecdsa.Signature
activeMembersCount uint32
endBlock uint64
signature *tecdsa.Signature
activityReport *signingActivityReport
endBlock uint64
}

wg := sync.WaitGroup{}
Expand Down Expand Up @@ -367,9 +367,9 @@ func (se *signingExecutor) sign(
)

signingOutcomeChan <- &signingOutcome{
signature: loopResult.result.Signature,
activeMembersCount: loopResult.activeMembersCount,
endBlock: loopResult.latestEndBlock,
signature: loopResult.result.Signature,
activityReport: loopResult.activityReport,
endBlock: loopResult.latestEndBlock,
}
}(currentSigner)
}
Expand All @@ -386,9 +386,9 @@ func (se *signingExecutor) sign(
// signer, that means all signers failed and have not produced a signature.
select {
case outcome := <-signingOutcomeChan:
return outcome.signature, outcome.activeMembersCount, outcome.endBlock, nil
return outcome.signature, outcome.activityReport, outcome.endBlock, nil
default:
return nil, 0, 0, fmt.Errorf("all signers failed")
return nil, nil, 0, fmt.Errorf("all signers failed")
}
}

Expand Down
20 changes: 16 additions & 4 deletions pkg/tbtc/signing_loop.go
Original file line number Diff line number Diff line change
Expand Up @@ -140,13 +140,20 @@ type signingAttemptParams struct {
// signingAttemptFn represents a function performing a signing attempt.
type signingAttemptFn func(*signingAttemptParams) (*signing.Result, uint64, error)

// signingActivityReport holds information about the activity of the signing
// group members during the signing process.
type signingActivityReport struct {
activeMembers []group.MemberIndex
inactiveMembers []group.MemberIndex
}

// signingRetryLoopResult represents the result of the signing retry loop.
type signingRetryLoopResult struct {
// result is the outcome of the signing process.
result *signing.Result
// activeMembersCount is the number of members that participated in the
// signing process.
activeMembersCount uint32
// activityReport holds information about the activity of the signing
// group members during the signing process.
activityReport *signingActivityReport
// latestEndBlock is the block at which the slowest signer of the successful
// signing attempt completed signature computation. This block is also
// the common end block accepted by all other members of the signing group.
Expand Down Expand Up @@ -409,9 +416,14 @@ func (srl *signingRetryLoop) start(
continue
}

activityReport := &signingActivityReport{
activeMembers: readyMembersIndexes,
inactiveMembers: unreadyMembersIndexes,
}

return &signingRetryLoopResult{
result: result,
activeMembersCount: uint32(len(readyMembersIndexes)),
activityReport: activityReport,
latestEndBlock: latestEndBlock,
attemptTimeoutBlock: timeoutBlock,
}, nil
Expand Down
Loading

0 comments on commit 27a8b97

Please sign in to comment.