From 34a5c3dd4793511a7e2a1fce15943a8357424690 Mon Sep 17 00:00:00 2001 From: Lei Date: Tue, 17 Oct 2023 07:31:07 -0700 Subject: [PATCH] chaos test for v21 (#10503) --- .../chaos/automation_chaos_test.go | 342 +++++++++--------- 1 file changed, 179 insertions(+), 163 deletions(-) diff --git a/integration-tests/chaos/automation_chaos_test.go b/integration-tests/chaos/automation_chaos_test.go index c292c130c5a..0c6b61e2969 100644 --- a/integration-tests/chaos/automation_chaos_test.go +++ b/integration-tests/chaos/automation_chaos_test.go @@ -111,174 +111,190 @@ func TestAutomationChaos(t *testing.T) { t.Parallel() l := logging.GetTestLogger(t) - testCases := map[string]struct { - networkChart environment.ConnectedChart - clChart environment.ConnectedChart - chaosFunc chaos.ManifestFunc - chaosProps *chaos.Props - }{ - // see ocr_chaos.test.go for comments - PodChaosFailMinorityNodes: { - ethereum.New(defaultEthereumSettings), - chainlink.New(0, defaultAutomationSettings), - chaos.NewFailPods, - &chaos.Props{ - LabelsSelector: &map[string]*string{ChaosGroupMinority: a.Str("1")}, - DurationStr: "1m", - }, - }, - PodChaosFailMajorityNodes: { - ethereum.New(defaultEthereumSettings), - chainlink.New(0, defaultAutomationSettings), - chaos.NewFailPods, - &chaos.Props{ - LabelsSelector: &map[string]*string{ChaosGroupMajority: a.Str("1")}, - DurationStr: "1m", - }, - }, - PodChaosFailMajorityDB: { - ethereum.New(defaultEthereumSettings), - chainlink.New(0, defaultAutomationSettings), - chaos.NewFailPods, - &chaos.Props{ - LabelsSelector: &map[string]*string{ChaosGroupMajority: a.Str("1")}, - DurationStr: "1m", - ContainerNames: &[]*string{a.Str("chainlink-db")}, - }, - }, - NetworkChaosFailMajorityNetwork: { - ethereum.New(defaultEthereumSettings), - chainlink.New(0, defaultAutomationSettings), - chaos.NewNetworkPartition, - &chaos.Props{ - FromLabels: &map[string]*string{ChaosGroupMajority: a.Str("1")}, - ToLabels: &map[string]*string{ChaosGroupMinority: a.Str("1")}, - DurationStr: "1m", - }, - }, - NetworkChaosFailBlockchainNode: { - ethereum.New(defaultEthereumSettings), - chainlink.New(0, defaultAutomationSettings), - chaos.NewNetworkPartition, - &chaos.Props{ - FromLabels: &map[string]*string{"app": a.Str("geth")}, - ToLabels: &map[string]*string{ChaosGroupMajorityPlus: a.Str("1")}, - DurationStr: "1m", - }, - }, + registryVersions := map[string]eth_contracts.KeeperRegistryVersion{ + "registry_2_0": eth_contracts.RegistryVersion_2_0, + "registry_2_1": eth_contracts.RegistryVersion_2_1, } - for n, tst := range testCases { - name := n - testCase := tst - t.Run(fmt.Sprintf("Automation_%s", name), func(t *testing.T) { + for name, registryVersion := range registryVersions { + t.Run(name, func(t *testing.T) { t.Parallel() - network := networks.SelectedNetwork // Need a new copy of the network for each test - - testEnvironment := environment. - New(&environment.Config{ - NamespacePrefix: fmt.Sprintf("chaos-automation-%s", name), - TTL: time.Hour * 1, - Test: t, - }). - AddHelm(testCase.networkChart). - AddHelm(testCase.clChart). - AddChart(blockscout.New(&blockscout.Props{ - Name: "geth-blockscout", - WsURL: network.URL, - HttpURL: network.HTTPURLs[0], - })) - err := testEnvironment.Run() - require.NoError(t, err, "Error setting up test environment") - if testEnvironment.WillUseRemoteRunner() { - return + + testCases := map[string]struct { + networkChart environment.ConnectedChart + clChart environment.ConnectedChart + chaosFunc chaos.ManifestFunc + chaosProps *chaos.Props + }{ + // see ocr_chaos.test.go for comments + PodChaosFailMinorityNodes: { + ethereum.New(defaultEthereumSettings), + chainlink.New(0, defaultAutomationSettings), + chaos.NewFailPods, + &chaos.Props{ + LabelsSelector: &map[string]*string{ChaosGroupMinority: a.Str("1")}, + DurationStr: "1m", + }, + }, + PodChaosFailMajorityNodes: { + ethereum.New(defaultEthereumSettings), + chainlink.New(0, defaultAutomationSettings), + chaos.NewFailPods, + &chaos.Props{ + LabelsSelector: &map[string]*string{ChaosGroupMajority: a.Str("1")}, + DurationStr: "1m", + }, + }, + PodChaosFailMajorityDB: { + ethereum.New(defaultEthereumSettings), + chainlink.New(0, defaultAutomationSettings), + chaos.NewFailPods, + &chaos.Props{ + LabelsSelector: &map[string]*string{ChaosGroupMajority: a.Str("1")}, + DurationStr: "1m", + ContainerNames: &[]*string{a.Str("chainlink-db")}, + }, + }, + NetworkChaosFailMajorityNetwork: { + ethereum.New(defaultEthereumSettings), + chainlink.New(0, defaultAutomationSettings), + chaos.NewNetworkPartition, + &chaos.Props{ + FromLabels: &map[string]*string{ChaosGroupMajority: a.Str("1")}, + ToLabels: &map[string]*string{ChaosGroupMinority: a.Str("1")}, + DurationStr: "1m", + }, + }, + NetworkChaosFailBlockchainNode: { + ethereum.New(defaultEthereumSettings), + chainlink.New(0, defaultAutomationSettings), + chaos.NewNetworkPartition, + &chaos.Props{ + FromLabels: &map[string]*string{"app": a.Str("geth")}, + ToLabels: &map[string]*string{ChaosGroupMajorityPlus: a.Str("1")}, + DurationStr: "1m", + }, + }, + } + + for n, tst := range testCases { + name := n + testCase := tst + t.Run(fmt.Sprintf("Automation_%s", name), func(t *testing.T) { + t.Parallel() + network := networks.SelectedNetwork // Need a new copy of the network for each test + + testEnvironment := environment. + New(&environment.Config{ + NamespacePrefix: fmt.Sprintf("chaos-automation-%s", name), + TTL: time.Hour * 1, + Test: t, + }). + AddHelm(testCase.networkChart). + AddHelm(testCase.clChart). + AddChart(blockscout.New(&blockscout.Props{ + Name: "geth-blockscout", + WsURL: network.URL, + HttpURL: network.HTTPURLs[0], + })) + err := testEnvironment.Run() + require.NoError(t, err, "Error setting up test environment") + if testEnvironment.WillUseRemoteRunner() { + return + } + + err = testEnvironment.Client.LabelChaosGroup(testEnvironment.Cfg.Namespace, "instance=node-", 1, 2, ChaosGroupMinority) + require.NoError(t, err) + err = testEnvironment.Client.LabelChaosGroup(testEnvironment.Cfg.Namespace, "instance=node-", 3, 5, ChaosGroupMajority) + require.NoError(t, err) + err = testEnvironment.Client.LabelChaosGroup(testEnvironment.Cfg.Namespace, "instance=node-", 2, 5, ChaosGroupMajorityPlus) + require.NoError(t, err) + + chainClient, err := blockchain.NewEVMClient(network, testEnvironment, l) + require.NoError(t, err, "Error connecting to blockchain") + contractDeployer, err := contracts.NewContractDeployer(chainClient, l) + require.NoError(t, err, "Error building contract deployer") + + chainlinkNodes, err := client.ConnectChainlinkNodes(testEnvironment) + require.NoError(t, err, "Error connecting to Chainlink nodes") + chainClient.ParallelTransactions(true) + + // Register cleanup for any test + t.Cleanup(func() { + if chainClient != nil { + chainClient.GasStats().PrintStats() + } + err := actions.TeardownSuite(t, testEnvironment, utils.ProjectRoot, chainlinkNodes, nil, zapcore.PanicLevel, chainClient) + require.NoError(t, err, "Error tearing down environment") + }) + + txCost, err := chainClient.EstimateCostForChainlinkOperations(1000) + require.NoError(t, err, "Error estimating cost for Chainlink Operations") + err = actions.FundChainlinkNodes(chainlinkNodes, chainClient, txCost) + require.NoError(t, err, "Error funding Chainlink nodes") + + linkToken, err := contractDeployer.DeployLinkTokenContract() + require.NoError(t, err, "Error deploying LINK token") + + registry, registrar := actions.DeployAutoOCRRegistryAndRegistrar( + t, + registryVersion, + defaultOCRRegistryConfig, + linkToken, + contractDeployer, + chainClient, + ) + + // Fund the registry with LINK + err = linkToken.Transfer(registry.Address(), big.NewInt(0).Mul(big.NewInt(1e18), big.NewInt(int64(numberOfUpkeeps)))) + require.NoError(t, err, "Funding keeper registry contract shouldn't fail") + + actions.CreateOCRKeeperJobs(t, chainlinkNodes, registry.Address(), network.ChainID, 0, registryVersion) + nodesWithoutBootstrap := chainlinkNodes[1:] + ocrConfig, err := actions.BuildAutoOCR2ConfigVars(t, nodesWithoutBootstrap, defaultOCRRegistryConfig, registrar.Address(), 30*time.Second) + require.NoError(t, err, "Error building OCR config vars") + err = registry.SetConfig(defaultOCRRegistryConfig, ocrConfig) + require.NoError(t, err, "Registry config should be be set successfully") + require.NoError(t, chainClient.WaitForEvents(), "Waiting for config to be set") + + consumers_conditional, upkeepIDs_conditional := actions.DeployConsumers(t, registry, registrar, linkToken, contractDeployer, chainClient, numberOfUpkeeps, big.NewInt(defaultLinkFunds), defaultUpkeepGasLimit, false) + consumers_logtrigger, upkeepIDs_logtrigger := actions.DeployConsumers(t, registry, registrar, linkToken, contractDeployer, chainClient, numberOfUpkeeps, big.NewInt(defaultLinkFunds), defaultUpkeepGasLimit, true) + + consumers := append(consumers_conditional, consumers_logtrigger...) + upkeepIDs := append(upkeepIDs_conditional, upkeepIDs_logtrigger...) + + l.Info().Msg("Waiting for all upkeeps to be performed") + + gom := gomega.NewGomegaWithT(t) + gom.Eventually(func(g gomega.Gomega) { + // Check if the upkeeps are performing multiple times by analyzing their counters and checking they are greater than 10 + for i := 0; i < len(upkeepIDs); i++ { + counter, err := consumers[i].Counter(context.Background()) + require.NoError(t, err, "Failed to retrieve consumer counter for upkeep at index %d", i) + expect := 5 + l.Info().Int64("Upkeeps Performed", counter.Int64()).Int("Upkeep ID", i).Msg("Number of upkeeps performed") + g.Expect(counter.Int64()).Should(gomega.BeNumerically(">=", int64(expect)), + "Expected consumer counter to be greater than %d, but got %d", expect, counter.Int64()) + } + }, "5m", "1s").Should(gomega.Succeed()) // ~1m for cluster setup, ~2m for performing each upkeep 5 times, ~2m buffer + + _, err = testEnvironment.Chaos.Run(testCase.chaosFunc(testEnvironment.Cfg.Namespace, testCase.chaosProps)) + require.NoError(t, err) + + gom.Eventually(func(g gomega.Gomega) { + // Check if the upkeeps are performing multiple times by analyzing their counters and checking they are greater than 10 + for i := 0; i < len(upkeepIDs); i++ { + counter, err := consumers[i].Counter(context.Background()) + require.NoError(t, err, "Failed to retrieve consumer counter for upkeep at index %d", i) + expect := 10 + l.Info().Int64("Upkeeps Performed", counter.Int64()).Int("Upkeep ID", i).Msg("Number of upkeeps performed") + g.Expect(counter.Int64()).Should(gomega.BeNumerically(">=", int64(expect)), + "Expected consumer counter to be greater than %d, but got %d", expect, counter.Int64()) + } + }, "3m", "1s").Should(gomega.Succeed()) // ~1m for cluster setup, ~2m for performing each upkeep 5 times, ~2m buffer + }) } - err = testEnvironment.Client.LabelChaosGroup(testEnvironment.Cfg.Namespace, "instance=node-", 1, 2, ChaosGroupMinority) - require.NoError(t, err) - err = testEnvironment.Client.LabelChaosGroup(testEnvironment.Cfg.Namespace, "instance=node-", 3, 5, ChaosGroupMajority) - require.NoError(t, err) - err = testEnvironment.Client.LabelChaosGroup(testEnvironment.Cfg.Namespace, "instance=node-", 2, 5, ChaosGroupMajorityPlus) - require.NoError(t, err) - - chainClient, err := blockchain.NewEVMClient(network, testEnvironment, l) - require.NoError(t, err, "Error connecting to blockchain") - contractDeployer, err := contracts.NewContractDeployer(chainClient, l) - require.NoError(t, err, "Error building contract deployer") - - chainlinkNodes, err := client.ConnectChainlinkNodes(testEnvironment) - require.NoError(t, err, "Error connecting to Chainlink nodes") - chainClient.ParallelTransactions(true) - - // Register cleanup for any test - t.Cleanup(func() { - if chainClient != nil { - chainClient.GasStats().PrintStats() - } - err := actions.TeardownSuite(t, testEnvironment, utils.ProjectRoot, chainlinkNodes, nil, zapcore.PanicLevel, chainClient) - require.NoError(t, err, "Error tearing down environment") - }) - - txCost, err := chainClient.EstimateCostForChainlinkOperations(1000) - require.NoError(t, err, "Error estimating cost for Chainlink Operations") - err = actions.FundChainlinkNodes(chainlinkNodes, chainClient, txCost) - require.NoError(t, err, "Error funding Chainlink nodes") - - linkToken, err := contractDeployer.DeployLinkTokenContract() - require.NoError(t, err, "Error deploying LINK token") - - registry, registrar := actions.DeployAutoOCRRegistryAndRegistrar( - t, - eth_contracts.RegistryVersion_2_0, - defaultOCRRegistryConfig, - linkToken, - contractDeployer, - chainClient, - ) - - // Fund the registry with LINK - err = linkToken.Transfer(registry.Address(), big.NewInt(0).Mul(big.NewInt(1e18), big.NewInt(int64(numberOfUpkeeps)))) - require.NoError(t, err, "Funding keeper registry contract shouldn't fail") - - actions.CreateOCRKeeperJobs(t, chainlinkNodes, registry.Address(), network.ChainID, 0, eth_contracts.RegistryVersion_2_0) - nodesWithoutBootstrap := chainlinkNodes[1:] - ocrConfig, err := actions.BuildAutoOCR2ConfigVars(t, nodesWithoutBootstrap, defaultOCRRegistryConfig, registrar.Address(), 30*time.Second) - require.NoError(t, err, "Error building OCR config vars") - err = registry.SetConfig(defaultOCRRegistryConfig, ocrConfig) - require.NoError(t, err, "Registry config should be be set successfully") - require.NoError(t, chainClient.WaitForEvents(), "Waiting for config to be set") - - consumers, upkeepIDs := actions.DeployConsumers(t, registry, registrar, linkToken, contractDeployer, chainClient, numberOfUpkeeps, big.NewInt(defaultLinkFunds), defaultUpkeepGasLimit, false) - - l.Info().Msg("Waiting for all upkeeps to be performed") - - gom := gomega.NewGomegaWithT(t) - gom.Eventually(func(g gomega.Gomega) { - // Check if the upkeeps are performing multiple times by analyzing their counters and checking they are greater than 10 - for i := 0; i < len(upkeepIDs); i++ { - counter, err := consumers[i].Counter(context.Background()) - require.NoError(t, err, "Failed to retrieve consumer counter for upkeep at index %d", i) - expect := 5 - l.Info().Int64("Upkeeps Performed", counter.Int64()).Int("Upkeep ID", i).Msg("Number of upkeeps performed") - g.Expect(counter.Int64()).Should(gomega.BeNumerically(">=", int64(expect)), - "Expected consumer counter to be greater than %d, but got %d", expect, counter.Int64()) - } - }, "5m", "1s").Should(gomega.Succeed()) // ~1m for cluster setup, ~2m for performing each upkeep 5 times, ~2m buffer - - _, err = testEnvironment.Chaos.Run(testCase.chaosFunc(testEnvironment.Cfg.Namespace, testCase.chaosProps)) - require.NoError(t, err) - - gom.Eventually(func(g gomega.Gomega) { - // Check if the upkeeps are performing multiple times by analyzing their counters and checking they are greater than 10 - for i := 0; i < len(upkeepIDs); i++ { - counter, err := consumers[i].Counter(context.Background()) - require.NoError(t, err, "Failed to retrieve consumer counter for upkeep at index %d", i) - expect := 10 - l.Info().Int64("Upkeeps Performed", counter.Int64()).Int("Upkeep ID", i).Msg("Number of upkeeps performed") - g.Expect(counter.Int64()).Should(gomega.BeNumerically(">=", int64(expect)), - "Expected consumer counter to be greater than %d, but got %d", expect, counter.Int64()) - } - }, "3m", "1s").Should(gomega.Succeed()) // ~1m for cluster setup, ~2m for performing each upkeep 5 times, ~2m buffer }) } }