diff --git a/core/services/ocr2/plugins/ocr2keeper/evm21/encoding/interface.go b/core/services/ocr2/plugins/ocr2keeper/evm21/encoding/interface.go index 4b559e9104e..e0c31f2beea 100644 --- a/core/services/ocr2/plugins/ocr2keeper/evm21/encoding/interface.go +++ b/core/services/ocr2/plugins/ocr2keeper/evm21/encoding/interface.go @@ -41,6 +41,7 @@ const ( MercuryUnmarshalError PipelineExecutionState = 6 InvalidMercuryRequest PipelineExecutionState = 7 InvalidMercuryResponse PipelineExecutionState = 8 // this will only happen if Mercury server sends bad responses + CheckBlockTooNew PipelineExecutionState = 9 ) type UpkeepInfo = iregistry21.KeeperRegistryBase21UpkeepInfo diff --git a/core/services/ocr2/plugins/ocr2keeper/evm21/registry_check_pipeline.go b/core/services/ocr2/plugins/ocr2keeper/evm21/registry_check_pipeline.go index 5cf2dfa0981..bf57ce496a3 100644 --- a/core/services/ocr2/plugins/ocr2keeper/evm21/registry_check_pipeline.go +++ b/core/services/ocr2/plugins/ocr2keeper/evm21/registry_check_pipeline.go @@ -15,12 +15,6 @@ import ( "github.com/smartcontractkit/chainlink/v2/core/services/ocr2/plugins/ocr2keeper/evm21/encoding" ) -const ( - // validCheckBlockRange decides the max distance between the check block and the current block - // allowed in checkPipeline - validCheckBlockRange = 128 -) - type checkResult struct { cr []ocr2keepers.CheckResult err error @@ -99,11 +93,11 @@ func (r *EvmRegistry) getBlockHash(blockNumber *big.Int) (common.Hash, error) { // verifyCheckBlock checks that the check block and hash are valid, returns the pipeline execution state and retryable func (r *EvmRegistry) verifyCheckBlock(ctx context.Context, checkBlock, upkeepId *big.Int, checkHash common.Hash) (state encoding.PipelineExecutionState, retryable bool) { - // verify check block number is not too old + // verify check block number is not in future (can happen when this node is lagging the other members in DON) latestBlock := r.bs.latestBlock.Load() - if int64(latestBlock.Number)-checkBlock.Int64() > validCheckBlockRange { - r.lggr.Warnf("latest block is %d, check block number %s is too old for upkeepId %s", r.bs.latestBlock.Load(), checkBlock, upkeepId) - return encoding.CheckBlockTooOld, false + if checkBlock.Int64() > int64(latestBlock.Number) { + r.lggr.Warnf("latest block is %d, check block number %s is in future for upkeepId %s", r.bs.latestBlock.Load(), checkBlock, upkeepId) + return encoding.CheckBlockTooNew, true // retryable since the block can be found in future } var h string @@ -240,10 +234,18 @@ func (r *EvmRegistry) checkUpkeeps(ctx context.Context, payloads []ocr2keepers.U for i, req := range checkReqs { index := indices[i] if req.Error != nil { - // individual upkeep failed in a batch call, retryable - r.lggr.Warnf("error encountered in check result for upkeepId %s: %s", results[index].UpkeepID.String(), req.Error) - results[index].Retryable = true - results[index].PipelineExecutionState = uint8(encoding.RpcFlakyFailure) + // primitive way of checking errors + if strings.Contains(req.Error.Error(), "header not found") { + // Check block not found in RPC, non-retryable error + r.lggr.Warnf("header not found error encountered in check result for upkeepId %s: %s", results[index].UpkeepID.String(), req.Error) + results[index].Retryable = false + results[index].PipelineExecutionState = uint8(encoding.CheckBlockTooOld) + } else { + // individual upkeep failed in a batch call, likely a flay RPC error, consider retryable + r.lggr.Warnf("rpc error encountered in check result for upkeepId %s: %s", results[index].UpkeepID.String(), req.Error) + results[index].Retryable = true + results[index].PipelineExecutionState = uint8(encoding.RpcFlakyFailure) + } } else { var err error results[index], err = r.packer.UnpackCheckResult(payloads[index], *checkResults[i]) diff --git a/core/services/ocr2/plugins/ocr2keeper/evm21/registry_check_pipeline_test.go b/core/services/ocr2/plugins/ocr2keeper/evm21/registry_check_pipeline_test.go index 69364396e8b..0cd1a11fcce 100644 --- a/core/services/ocr2/plugins/ocr2keeper/evm21/registry_check_pipeline_test.go +++ b/core/services/ocr2/plugins/ocr2keeper/evm21/registry_check_pipeline_test.go @@ -86,9 +86,9 @@ func TestRegistry_VerifyCheckBlock(t *testing.T) { makeEthCall bool }{ { - name: "check block number too told", + name: "check block number too new", checkBlock: big.NewInt(500), - latestBlock: ocr2keepers.BlockKey{Number: 800}, + latestBlock: ocr2keepers.BlockKey{Number: 400}, upkeepId: big.NewInt(12345), checkHash: common.HexToHash("0x5bff03de234fe771ac0d685f9ee0fb0b757ea02ec9e6f10e8e2ee806db1b6b83"), payload: ocr2keepers.UpkeepPayload{ @@ -96,7 +96,8 @@ func TestRegistry_VerifyCheckBlock(t *testing.T) { Trigger: ocr2keepers.NewTrigger(500, common.HexToHash("0x5bff03de234fe771ac0d685f9ee0fb0b757ea02ec9e6f10e8e2ee806db1b6b83")), WorkID: "work", }, - state: encoding.CheckBlockTooOld, + state: encoding.CheckBlockTooNew, + retryable: true, }, { name: "for an invalid check block number, if hash does not match the check hash, return CheckBlockInvalid", @@ -368,7 +369,7 @@ func TestRegistry_CheckUpkeeps(t *testing.T) { BlockNumber: 550, } - trigger0 := ocr2keepers.NewTrigger(150, common.HexToHash("0x1c77db0abe32327cf3ea9de2aadf79876f9e6b6dfcee9d4719a8a2dc8ca289d0")) + trigger0 := ocr2keepers.NewTrigger(590, common.HexToHash("0x1c77db0abe32327cf3ea9de2aadf79876f9e6b6dfcee9d4719a8a2dc8ca289d0")) trigger1 := ocr2keepers.NewLogTrigger(560, common.HexToHash("0x9840e5b709bfccf6a1b44f34c884bc39403f57923f3f5ead6243cc090546b857"), extension1) trigger2 := ocr2keepers.NewLogTrigger(570, common.HexToHash("0x1222d75217e2dd461cc77e4091c37abe76277430d97f1963a822b4e94ebb83fc"), extension2) @@ -412,8 +413,8 @@ func TestRegistry_CheckUpkeeps(t *testing.T) { latestBlock: ocr2keepers.BlockKey{Number: 580}, results: []ocr2keepers.CheckResult{ { - PipelineExecutionState: uint8(encoding.CheckBlockTooOld), - Retryable: false, + PipelineExecutionState: uint8(encoding.CheckBlockTooNew), + Retryable: true, Eligible: false, IneligibilityReason: 0, UpkeepID: uid0,