From 8e01538b02d3e5bb87b7b98ecffedf5ccea18ef9 Mon Sep 17 00:00:00 2001 From: Balamurali Gopalswami Date: Mon, 18 Nov 2024 12:49:41 -0500 Subject: [PATCH 1/3] Fix reorg test flake --- integration-tests/ccip-tests/smoke/ccip_test.go | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/integration-tests/ccip-tests/smoke/ccip_test.go b/integration-tests/ccip-tests/smoke/ccip_test.go index cc8cc9b9a4..10039350fc 100644 --- a/integration-tests/ccip-tests/smoke/ccip_test.go +++ b/integration-tests/ccip-tests/smoke/ccip_test.go @@ -892,7 +892,7 @@ func TestSmokeCCIPReorgBelowFinality(t *testing.T) { // Test creates above finality reorg at destination and // expects ccip transactions in-flight and the one initiated after reorg -// doesn't go through and verifies every node is able to detect reorg. +// doesn't go through and verifies at least half of the nodes is able to detect reorg. // Note: LogPollInterval interval is set as 1s to detect the reorg immediately func TestSmokeCCIPReorgAboveFinalityAtDestination(t *testing.T) { t.Parallel() @@ -903,7 +903,7 @@ func TestSmokeCCIPReorgAboveFinalityAtDestination(t *testing.T) { // Test creates above finality reorg at destination and // expects ccip transactions in-flight doesn't go through, the transaction initiated after reorg -// shouldn't even get initiated and verifies every node is able to detect reorg. +// shouldn't even get initiated and verifies at least half of the nodes is able to detect reorg. // Note: LogPollInterval interval is set as 1s to detect the reorg immediately func TestSmokeCCIPReorgAboveFinalityAtSource(t *testing.T) { t.Parallel() @@ -1060,7 +1060,8 @@ func performAboveFinalityReorgAndValidate(t *testing.T, network string) { rs.RunReorg(rs.SrcClient, int(rs.Cfg.SrcFinalityDepth)+rs.Cfg.FinalityDelta, network, 2*time.Second) } clNodes := setUpOutput.Env.CLNodes - // assert every node is detecting the reorg (LogPollInterval is set as 1s for faster detection) + // assert at least half of the nodes is detecting the reorg (LogPollInterval is set as 1s for faster detection) + // expecting to detect reorg by every node is flake. So, checking with at least half of the nodes. nodesDetectedViolation := make(map[string]bool) assert.Eventually(t, func() bool { for _, node := range clNodes { @@ -1076,7 +1077,7 @@ func performAboveFinalityReorgAndValidate(t *testing.T, network string) { } } } - return len(nodesDetectedViolation) == len(clNodes) + return len(nodesDetectedViolation) >= len(clNodes)/2 }, 5*time.Minute, 5*time.Second, "Reorg above finality depth is not detected by every node") log.Debug().Interface("Nodes", nodesDetectedViolation).Msg("Violation detection details") // send another request and verify it fails From 475463768090415fb2117fee17aa90a785637e73 Mon Sep 17 00:00:00 2001 From: Balamurali Gopalswami Date: Mon, 18 Nov 2024 14:35:48 -0500 Subject: [PATCH 2/3] update error msg --- integration-tests/ccip-tests/smoke/ccip_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integration-tests/ccip-tests/smoke/ccip_test.go b/integration-tests/ccip-tests/smoke/ccip_test.go index 10039350fc..4ec0ab6ce9 100644 --- a/integration-tests/ccip-tests/smoke/ccip_test.go +++ b/integration-tests/ccip-tests/smoke/ccip_test.go @@ -1078,7 +1078,7 @@ func performAboveFinalityReorgAndValidate(t *testing.T, network string) { } } return len(nodesDetectedViolation) >= len(clNodes)/2 - }, 5*time.Minute, 5*time.Second, "Reorg above finality depth is not detected by every node") + }, 5*time.Minute, 5*time.Second, "Reorg above finality depth is not detected by half the nodes") log.Debug().Interface("Nodes", nodesDetectedViolation).Msg("Violation detection details") // send another request and verify it fails err = lane.SendRequests(1, gasLimit) From 41baa6902d30b1accb171b2a8341283dce585e89 Mon Sep 17 00:00:00 2001 From: Balamurali Gopalswami Date: Tue, 19 Nov 2024 15:05:06 -0500 Subject: [PATCH 3/3] Review comments --- .../ccip-tests/smoke/ccip_test.go | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/integration-tests/ccip-tests/smoke/ccip_test.go b/integration-tests/ccip-tests/smoke/ccip_test.go index 4ec0ab6ce9..99c7a9012f 100644 --- a/integration-tests/ccip-tests/smoke/ccip_test.go +++ b/integration-tests/ccip-tests/smoke/ccip_test.go @@ -2,6 +2,7 @@ package smoke import ( "fmt" + "math" "math/big" "strings" "testing" @@ -892,7 +893,7 @@ func TestSmokeCCIPReorgBelowFinality(t *testing.T) { // Test creates above finality reorg at destination and // expects ccip transactions in-flight and the one initiated after reorg -// doesn't go through and verifies at least half of the nodes is able to detect reorg. +// doesn't go through and verifies f+1 nodes are able to detect reorg. // Note: LogPollInterval interval is set as 1s to detect the reorg immediately func TestSmokeCCIPReorgAboveFinalityAtDestination(t *testing.T) { t.Parallel() @@ -903,7 +904,7 @@ func TestSmokeCCIPReorgAboveFinalityAtDestination(t *testing.T) { // Test creates above finality reorg at destination and // expects ccip transactions in-flight doesn't go through, the transaction initiated after reorg -// shouldn't even get initiated and verifies at least half of the nodes is able to detect reorg. +// shouldn't even get initiated and verifies f+1 nodes are able to detect reorg. // Note: LogPollInterval interval is set as 1s to detect the reorg immediately func TestSmokeCCIPReorgAboveFinalityAtSource(t *testing.T) { t.Parallel() @@ -1059,12 +1060,13 @@ func performAboveFinalityReorgAndValidate(t *testing.T, network string) { logPollerName = fmt.Sprintf("EVM.%d.LogPoller", lane.SourceChain.GetChainID()) rs.RunReorg(rs.SrcClient, int(rs.Cfg.SrcFinalityDepth)+rs.Cfg.FinalityDelta, network, 2*time.Second) } - clNodes := setUpOutput.Env.CLNodes - // assert at least half of the nodes is detecting the reorg (LogPollInterval is set as 1s for faster detection) - // expecting to detect reorg by every node is flake. So, checking with at least half of the nodes. + // DON is 3F+1, finding f+1 from the given number of nodes in the environment + fPlus1Nodes := int(math.Ceil(float64(len(setUpOutput.Env.CLNodes)-1)/3)) + 1 + // assert at least f+1 nodes is detecting the reorg (LogPollInterval is set as 1s for faster detection) + // additional context: Commit requires 2f+1 observations, so f+1 nodes need to detect it in order to force the entire DON to stop processing messages. nodesDetectedViolation := make(map[string]bool) assert.Eventually(t, func() bool { - for _, node := range clNodes { + for _, node := range setUpOutput.Env.CLNodes { if _, ok := nodesDetectedViolation[node.ChainlinkClient.URL()]; ok { continue } @@ -1077,8 +1079,8 @@ func performAboveFinalityReorgAndValidate(t *testing.T, network string) { } } } - return len(nodesDetectedViolation) >= len(clNodes)/2 - }, 5*time.Minute, 5*time.Second, "Reorg above finality depth is not detected by half the nodes") + return len(nodesDetectedViolation) >= fPlus1Nodes + }, 3*time.Minute, 20*time.Second, "Reorg above finality depth is not detected by f+1 nodes") log.Debug().Interface("Nodes", nodesDetectedViolation).Msg("Violation detection details") // send another request and verify it fails err = lane.SendRequests(1, gasLimit)