Skip to content

Commit

Permalink
More long chaos tests (#230)
Browse files Browse the repository at this point in the history
  • Loading branch information
AnieeG authored Oct 23, 2023
1 parent a1fd68d commit 457e53e
Show file tree
Hide file tree
Showing 14 changed files with 764 additions and 838 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ccip-chaos-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ jobs:
- name: Run Load With Chaos Tests
uses: smartcontractkit/chainlink-github-actions/chainlink-testing-framework/run-tests@ad22fbd6f4d108b82aaf49b527bcf40f32babea8 # v2.2.1
with:
test_command_to_run: make test_need_operator_assets && cd ./integration-tests/ccip-tests && go test -timeout 1h -count=1 -json -test.parallel 4 -run '^TestLoadCCIPStableRequestTriggeringWithPodChaos$' ./load 2>&1 | tee /tmp/gotest.log | gotestfmt
test_command_to_run: make test_need_operator_assets && cd ./integration-tests/ccip-tests && go test -timeout 1h -count=1 -json -test.parallel 4 -run '^TestLoadCCIPStableWithPodChaosDiffCommitAndExec' ./load 2>&1 | tee /tmp/gotest.log | gotestfmt
test_download_vendor_packages_command: make gomod
cl_repo: ${{ secrets.QA_AWS_ACCOUNT_NUMBER }}.dkr.ecr.${{ secrets.QA_AWS_REGION }}.amazonaws.com/chainlink
cl_image_tag: ${{ github.sha }}
Expand Down
6 changes: 3 additions & 3 deletions integration-tests/ccip-tests/Makefile
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
# example usage: make test_load_ccip_simulated_k8 image=chainlink-ccip tag=latest testimage=chainlink-ccip-tests:latest
# example usage: make test_load_ccip_simulated_k8 image=chainlink-ccip tag=latest testimage=chainlink-ccip-tests:latest testname=TestLoadCCIPStableRequestTriggeringWithNetworkChaos
.PHONY: test_load_ccip_simulated_k8
test_load_ccip_simulated_k8:
source ./load-test.env && \
CHAINLINK_IMAGE=$(image) \
CHAINLINK_VERSION=$(tag) \
ENV_JOB_IMAGE=$(testimage) \
go test -timeout 24h -count=1 -v -run ^TestLoadCCIPStableRequestTriggeringWithNetworkChaos$$ ./load
go test -timeout 24h -count=1 -v -run ^$(testname)$$ ./load


# example usage: make test_load_ccip_simulated_k8 image=chainlink-ccip tag=latest testimage=chainlink-ccip-tests:latest
Expand All @@ -28,7 +28,7 @@ test_smoke_ccip_simulated_local:
CCIP_DEPLOY_ON_LOCAL=True \
CCIP_NETWORK_PAIRS="" \
CCIP_NO_OF_NETWORKS=2 \
CCIP_NO_OF_LANES_PER_PAIR=1 \
CCIP_NO_OF_ROUTERS_PER_PAIR=1 \
CCIP_MSG_TYPE=WithToken \
CHAINLINK_IMAGE=$(image) \
CHAINLINK_VERSION=$(tag) \
Expand Down
40 changes: 27 additions & 13 deletions integration-tests/ccip-tests/actions/ccip_helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,14 @@ const (
ChaosGroupCommitFaulty = "CommitMinority" // f number of nodes
ChaosGroupExecutionFaultyPlus = "ExecutionNodesMajority" // > f number of nodes
ChaosGroupExecutionFaulty = "ExecutionNodesMinority" // f number of nodes
ChaosGroupCCIPGeth = "CCIPGeth" // both source and destination simulated geth networks
ChaosGroupNetworkACCIPGeth = "CCIPNetworkAGeth"
ChaosGroupNetworkBCCIPGeth = "CCIPNetworkBGeth"
RootSnoozeTimeSimulated = 3 * time.Minute
InflightExpirySimulated = 3 * time.Minute

ChaosGroupCommitAndExecFaulty = "CommitAndExecutionNodesMinority" // f number of nodes
ChaosGroupCommitAndExecFaultyPlus = "CommitAndExecutionNodesMajority" // >f number of nodes
ChaosGroupCCIPGeth = "CCIPGeth" // both source and destination simulated geth networks
ChaosGroupNetworkACCIPGeth = "CCIPNetworkAGeth"
ChaosGroupNetworkBCCIPGeth = "CCIPNetworkBGeth"
RootSnoozeTimeSimulated = 3 * time.Minute
InflightExpirySimulated = 3 * time.Minute
// The higher the load/throughput, the higher value we might need here to guarantee that nonces are not blocked
// 1 day should be enough for most of the cases
PermissionlessExecThreshold = 60 * 60 * 24 // 1 day
Expand Down Expand Up @@ -1852,10 +1855,8 @@ func (lane *CCIPLane) DeployNewCCIPLane(
var bootstrapExec *client.CLNodesWithKeys
var execNodes []*client.CLNodesWithKeys
commitNodes := clNodes[1:]
env.commitNodeStartIndex = 1
env.execNodeStartIndex = 1
env.numOfAllowedFaultyExec = 1
env.numOfAllowedFaultyCommit = 1
env.commitNodeStartIndex = 2
env.execNodeStartIndex = 2
env.numOfCommitNodes = numOfCommitNodes
env.numOfExecNodes = numOfCommitNodes
if !commitAndExecOnSameDON {
Expand All @@ -1865,13 +1866,16 @@ func (lane *CCIPLane) DeployNewCCIPLane(
bootstrapExec = clNodes[1] // for a set-up of different commit and execution nodes second node is the bootstrapper for execution nodes
commitNodes = clNodes[2 : 2+numOfCommitNodes]
execNodes = clNodes[2+numOfCommitNodes:]
env.commitNodeStartIndex = 2
env.execNodeStartIndex = 7
env.commitNodeStartIndex = 3
env.execNodeStartIndex = 3 + numOfCommitNodes
env.numOfCommitNodes = len(commitNodes)
env.numOfExecNodes = len(execNodes)
} else {
execNodes = commitNodes
}
env.numOfAllowedFaultyExec = (len(execNodes) - 1) / 3
env.numOfAllowedFaultyCommit = (len(commitNodes) - 1) / 3

// save the current block numbers. If there is a delay between job start up and ocr config set up, the jobs will
// replay the log polling from these mentioned block number. The dest block number should ideally be the block number on which
// contract config is set and the source block number should be the one on which the ccip send request is performed.
Expand Down Expand Up @@ -2201,11 +2205,21 @@ func (c *CCIPTestEnv) ChaosLabelForAllGeth(t *testing.T, gethNetworksLabels []st
}

func (c *CCIPTestEnv) ChaosLabelForCLNodes(t *testing.T) {
allowedFaulty := c.numOfAllowedFaultyCommit
for i := c.commitNodeStartIndex; i < len(c.CLNodes); i++ {
labelSelector := map[string]string{
"app": "chainlink-0",
"instance": fmt.Sprintf("node-%d", i),
}
if i >= c.commitNodeStartIndex && i < c.commitNodeStartIndex+allowedFaulty+1 {
err := c.K8Env.Client.LabelChaosGroupByLabels(c.K8Env.Cfg.Namespace, labelSelector, ChaosGroupCommitAndExecFaultyPlus)
require.NoError(t, err)
}
if i >= c.commitNodeStartIndex && i < c.commitNodeStartIndex+allowedFaulty {
err := c.K8Env.Client.LabelChaosGroupByLabels(c.K8Env.Cfg.Namespace, labelSelector, ChaosGroupCommitAndExecFaulty)
require.NoError(t, err)
}

// commit node starts from index 2
if i >= c.commitNodeStartIndex && i < c.commitNodeStartIndex+c.numOfCommitNodes {
err := c.K8Env.Client.LabelChaosGroupByLabels(c.K8Env.Cfg.Namespace, labelSelector, ChaosGroupCommit)
Expand Down Expand Up @@ -2246,7 +2260,7 @@ func (c *CCIPTestEnv) SetUpNodesAndKeys(
if c.LocalCluster != nil {
// for local cluster, fetch the values from the local cluster
for _, chainlinkNode := range c.LocalCluster.ClCluster.Nodes {
chainlinkNodes = append(chainlinkNodes, chainlinkNode.API)
chainlinkNodes = append(chainlinkNodes, chainlinkNode.API.WithRetryCount(3))
c.nodeMutexes = append(c.nodeMutexes, &sync.Mutex{})
}
} else {
Expand All @@ -2261,7 +2275,7 @@ func (c *CCIPTestEnv) SetUpNodesAndKeys(
}

for _, chainlinkNode := range chainlinkK8sNodes {
chainlinkNodes = append(chainlinkNodes, chainlinkNode.ChainlinkClient)
chainlinkNodes = append(chainlinkNodes, chainlinkNode.ChainlinkClient.WithRetryCount(3))
c.nodeMutexes = append(c.nodeMutexes, &sync.Mutex{})
}
c.CLNodes = chainlinkK8sNodes
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -541,7 +541,7 @@ func OffChainAggregatorV2ConfigWithNodes(numberNodes int, inflightExpiry time.Du
}
faultyNodes := 0
if numberNodes > 1 {
faultyNodes = numberNodes/3 - 1
faultyNodes = numberNodes / 3
}
if faultyNodes == 0 {
faultyNodes = 1
Expand Down
Loading

0 comments on commit 457e53e

Please sign in to comment.