From 9f39f876a16b42bc4d39898b7982a0d22186bc06 Mon Sep 17 00:00:00 2001 From: Balamurali Gopalswami <167726375+b-gopalswami@users.noreply.github.com> Date: Wed, 20 Nov 2024 09:08:45 -0500 Subject: [PATCH] Fix reorg test flake (#1542) For some weird reasons, the health check call in nodes doesn't show finality violated error consistently across all the nodes. Due to which this test TestSmokeCCIPReorgAboveFinalityAtDestination/Above_finality_reorg_in_destination_chain flakes often in CI. Adjusting the expectation to have at least half of the nodes detects violation instead of every node. CI Failure examples: https://github.com/smartcontractkit/chainlink/actions/runs/11894867899/job/33143516553 https://github.com/smartcontractkit/chainlink/actions/runs/11894232425/job/33141414438?pr=15282 --- integration-tests/ccip-tests/smoke/ccip_test.go | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/integration-tests/ccip-tests/smoke/ccip_test.go b/integration-tests/ccip-tests/smoke/ccip_test.go index cc8cc9b9a4..99c7a9012f 100644 --- a/integration-tests/ccip-tests/smoke/ccip_test.go +++ b/integration-tests/ccip-tests/smoke/ccip_test.go @@ -2,6 +2,7 @@ package smoke import ( "fmt" + "math" "math/big" "strings" "testing" @@ -892,7 +893,7 @@ func TestSmokeCCIPReorgBelowFinality(t *testing.T) { // Test creates above finality reorg at destination and // expects ccip transactions in-flight and the one initiated after reorg -// doesn't go through and verifies every node is able to detect reorg. +// doesn't go through and verifies f+1 nodes are able to detect reorg. // Note: LogPollInterval interval is set as 1s to detect the reorg immediately func TestSmokeCCIPReorgAboveFinalityAtDestination(t *testing.T) { t.Parallel() @@ -903,7 +904,7 @@ func TestSmokeCCIPReorgAboveFinalityAtDestination(t *testing.T) { // Test creates above finality reorg at destination and // expects ccip transactions in-flight doesn't go through, the transaction initiated after reorg -// shouldn't even get initiated and verifies every node is able to detect reorg. +// shouldn't even get initiated and verifies f+1 nodes are able to detect reorg. // Note: LogPollInterval interval is set as 1s to detect the reorg immediately func TestSmokeCCIPReorgAboveFinalityAtSource(t *testing.T) { t.Parallel() @@ -1059,11 +1060,13 @@ func performAboveFinalityReorgAndValidate(t *testing.T, network string) { logPollerName = fmt.Sprintf("EVM.%d.LogPoller", lane.SourceChain.GetChainID()) rs.RunReorg(rs.SrcClient, int(rs.Cfg.SrcFinalityDepth)+rs.Cfg.FinalityDelta, network, 2*time.Second) } - clNodes := setUpOutput.Env.CLNodes - // assert every node is detecting the reorg (LogPollInterval is set as 1s for faster detection) + // DON is 3F+1, finding f+1 from the given number of nodes in the environment + fPlus1Nodes := int(math.Ceil(float64(len(setUpOutput.Env.CLNodes)-1)/3)) + 1 + // assert at least f+1 nodes is detecting the reorg (LogPollInterval is set as 1s for faster detection) + // additional context: Commit requires 2f+1 observations, so f+1 nodes need to detect it in order to force the entire DON to stop processing messages. nodesDetectedViolation := make(map[string]bool) assert.Eventually(t, func() bool { - for _, node := range clNodes { + for _, node := range setUpOutput.Env.CLNodes { if _, ok := nodesDetectedViolation[node.ChainlinkClient.URL()]; ok { continue } @@ -1076,8 +1079,8 @@ func performAboveFinalityReorgAndValidate(t *testing.T, network string) { } } } - return len(nodesDetectedViolation) == len(clNodes) - }, 5*time.Minute, 5*time.Second, "Reorg above finality depth is not detected by every node") + return len(nodesDetectedViolation) >= fPlus1Nodes + }, 3*time.Minute, 20*time.Second, "Reorg above finality depth is not detected by f+1 nodes") log.Debug().Interface("Nodes", nodesDetectedViolation).Msg("Violation detection details") // send another request and verify it fails err = lane.SendRequests(1, gasLimit)