Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

rewrite some flaky zombienet polkadot tests to zombienet-sdk #6757

Open
wants to merge 39 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 31 commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
4f898fc
add zombienet test for rfc 103
alindima Nov 12, 2024
89d8e97
Merge remote-tracking branch 'origin/master' into alindima/rfc-103-test
alindima Nov 12, 2024
a2028bf
override col_image
alindima Nov 13, 2024
0fcdcbb
add zombienet test for mixed receipt versions
alindima Nov 13, 2024
c81783f
fix yml
alindima Nov 13, 2024
d7fba75
try fixing col_image
alindima Nov 14, 2024
2e83300
remove resource reqs
alindima Nov 14, 2024
3a6b09e
fix
alindima Nov 14, 2024
037a632
relax
alindima Nov 20, 2024
dd3ede2
Merge remote-tracking branch 'origin/master' into alindima/rfc-103-test
alindima Nov 20, 2024
478fbb7
Merge remote-tracking branch 'origin/master' into alindima/rfc-103-test
alindima Nov 29, 2024
6f4dbb4
rewrite first test using zombienet-sdk
alindima Dec 2, 2024
610f29d
fixes
alindima Dec 2, 2024
7e1f0d4
add second test
alindima Dec 2, 2024
78eaf5e
do not re-init the logger
alindima Dec 2, 2024
184f206
prdoc
alindima Dec 2, 2024
01fbf6b
oops
alindima Dec 2, 2024
12c9f10
oops again
alindima Dec 2, 2024
069b7ba
fix prdoc
alindima Dec 2, 2024
385a139
Merge branch 'master' into alindima/rfc-103-test
alindima Dec 2, 2024
0a5f5ed
feedback
alindima Dec 4, 2024
b221b31
Merge branch 'master' into alindima/rfc-103-test
alindima Dec 4, 2024
060bd62
rewrite basic_3cores and doesnt_break_parachains tests
alindima Dec 4, 2024
8c10400
more work
alindima Dec 4, 2024
e47d0f1
Merge branch 'master' into alindima/rfc-103-test
alindima Dec 10, 2024
7b36433
relax assertion
alindima Dec 10, 2024
3036329
Merge remote-tracking branch 'origin/master' into alindima/zombienet-…
alindima Dec 10, 2024
d22d42e
move helpers to folder
alindima Dec 10, 2024
a03ce07
Merge remote-tracking branch 'origin/alindima/rfc-103-test' into alin…
alindima Dec 10, 2024
f626c82
rewrite async-backing-6-seconds-rate test
alindima Dec 10, 2024
5d5aed3
Merge remote-tracking branch 'origin/master' into alindima/zombienet-…
alindima Dec 18, 2024
6a12f33
Merge remote-tracking branch 'origin/master' into alindima/zombienet-…
alindima Dec 19, 2024
9ba1531
add more debug
pepoviola Dec 19, 2024
0ec8d89
add more cpu to runner
pepoviola Dec 19, 2024
e103ea2
Merge remote-tracking branch 'origin/alindima/zombienet-sdk-rewrite' …
alindima Dec 19, 2024
48b8105
address review comments
alindima Dec 19, 2024
ac2d4fe
Merge remote-tracking branch 'origin/master' into alindima/zombienet-…
alindima Dec 23, 2024
ba27e1a
remove redundant file
alindima Dec 23, 2024
4341161
sort deps
alindima Dec 23, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 59 additions & 41 deletions .gitlab/pipeline/zombienet/polkadot.yml
Original file line number Diff line number Diff line change
Expand Up @@ -160,39 +160,6 @@ zombienet-polkadot-functional-0010-validator-disabling:
--local-dir="${LOCAL_DIR}/functional"
--test="0010-validator-disabling.zndsl"

.zombienet-polkadot-functional-0011-async-backing-6-seconds-rate:
extends:
- .zombienet-polkadot-common
script:
- /home/nonroot/zombie-net/scripts/ci/run-test-local-env-manager.sh
--local-dir="${LOCAL_DIR}/functional"
--test="0011-async-backing-6-seconds-rate.zndsl"

zombienet-polkadot-elastic-scaling-0001-basic-3cores-6s-blocks:
extends:
- .zombienet-polkadot-common
variables:
FORCED_INFRA_INSTANCE: "spot-iops"
before_script:
- !reference [ .zombienet-polkadot-common, before_script ]
- cp --remove-destination ${LOCAL_DIR}/assign-core.js ${LOCAL_DIR}/elastic_scaling
script:
- /home/nonroot/zombie-net/scripts/ci/run-test-local-env-manager.sh
--local-dir="${LOCAL_DIR}/elastic_scaling"
--test="0001-basic-3cores-6s-blocks.zndsl"

.zombienet-polkadot-elastic-scaling-0002-elastic-scaling-doesnt-break-parachains:
extends:
- .zombienet-polkadot-common
before_script:
- !reference [ .zombienet-polkadot-common, before_script ]
- cp --remove-destination ${LOCAL_DIR}/assign-core.js ${LOCAL_DIR}/elastic_scaling
script:
- /home/nonroot/zombie-net/scripts/ci/run-test-local-env-manager.sh
--local-dir="${LOCAL_DIR}/elastic_scaling"
--test="0002-elastic-scaling-doesnt-break-parachains.zndsl"


.zombienet-polkadot-functional-0012-spam-statement-distribution-requests:
extends:
- .zombienet-polkadot-common
Expand Down Expand Up @@ -236,14 +203,6 @@ zombienet-polkadot-functional-0015-coretime-shared-core:
--local-dir="${LOCAL_DIR}/functional"
--test="0016-approval-voting-parallel.zndsl"

.zombienet-polkadot-functional-0017-sync-backing:
extends:
- .zombienet-polkadot-common
script:
- /home/nonroot/zombie-net/scripts/ci/run-test-local-env-manager.sh
--local-dir="${LOCAL_DIR}/functional"
--test="0017-sync-backing.zndsl"

zombienet-polkadot-functional-0018-shared-core-idle-parachain:
extends:
- .zombienet-polkadot-common
Expand Down Expand Up @@ -416,3 +375,62 @@ zombienet-polkadot-elastic-scaling-slot-based-3cores:
- unset NEXTEST_FAILURE_OUTPUT
- unset NEXTEST_SUCCESS_OUTPUT
- cargo nextest run --archive-file ./artifacts/polkadot-zombienet-tests.tar.zst --no-capture -- elastic_scaling::slot_based_3cores::slot_based_3cores_test

zombienet-polkadot-elastic-scaling-doesnt-break-parachains:
extends:
- .zombienet-polkadot-common
needs:
- job: build-polkadot-zombienet-tests
artifacts: true
before_script:
- !reference [ ".zombienet-polkadot-common", "before_script" ]
script:
# we want to use `--no-capture` in zombienet tests.
- unset NEXTEST_FAILURE_OUTPUT
- unset NEXTEST_SUCCESS_OUTPUT
- cargo nextest run --archive-file ./artifacts/polkadot-zombienet-tests.tar.zst --no-capture -- elastic_scaling::doesnt_break_parachains::doesnt_break_parachains_test

zombienet-polkadot-elastic-scaling-basic-3cores:
extends:
- .zombienet-polkadot-common
needs:
- job: build-polkadot-zombienet-tests
artifacts: true
before_script:
- !reference [ ".zombienet-polkadot-common", "before_script" ]
- export CUMULUS_IMAGE="${COL_IMAGE}"
script:
# we want to use `--no-capture` in zombienet tests.
- unset NEXTEST_FAILURE_OUTPUT
- unset NEXTEST_SUCCESS_OUTPUT
- cargo nextest run --archive-file ./artifacts/polkadot-zombienet-tests.tar.zst --no-capture -- elastic_scaling::basic_3cores::basic_3cores_test

zombienet-polkadot-functional-sync-backing:
extends:
- .zombienet-polkadot-common
needs:
- job: build-polkadot-zombienet-tests
artifacts: true
before_script:
- !reference [ ".zombienet-polkadot-common", "before_script" ]
# Hardcoded to an old polkadot-parachain image, pre async backing.
- export CUMULUS_IMAGE="docker.io/paritypr/polkadot-parachain-debug:master-99623e62"
script:
# we want to use `--no-capture` in zombienet tests.
- unset NEXTEST_FAILURE_OUTPUT
- unset NEXTEST_SUCCESS_OUTPUT
- cargo nextest run --archive-file ./artifacts/polkadot-zombienet-tests.tar.zst --no-capture -- functional::sync_backing::sync_backing_test

zombienet-polkadot-functional-async-backing-6-seconds-rate:
extends:
- .zombienet-polkadot-common
needs:
- job: build-polkadot-zombienet-tests
artifacts: true
before_script:
- !reference [ ".zombienet-polkadot-common", "before_script" ]
script:
# we want to use `--no-capture` in zombienet tests.
- unset NEXTEST_FAILURE_OUTPUT
- unset NEXTEST_SUCCESS_OUTPUT
- cargo nextest run --archive-file ./artifacts/polkadot-zombienet-tests.tar.zst --no-capture -- functional::async_backing_6_seconds_rate::async_backing_6_seconds_rate_test
132 changes: 132 additions & 0 deletions polkadot/zombienet-sdk-tests/tests/elastic_scaling/basic_3cores.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
// Copyright (C) Parity Technologies (UK) Ltd.
// SPDX-License-Identifier: Apache-2.0

// Test that a parachain that uses a basic collator (like adder-collator) with elastic scaling
// can achieve full throughput of 3 candidates per block.

use anyhow::anyhow;

use crate::helpers::{
assert_para_throughput, rococo,
rococo::runtime_types::{
pallet_broker::coretime_interface::CoreAssignment,
polkadot_runtime_parachains::assigner_coretime::PartsOf57600,
},
};
use serde_json::json;
use subxt::{OnlineClient, PolkadotConfig};
use subxt_signer::sr25519::dev;
use zombienet_sdk::NetworkConfigBuilder;

#[tokio::test(flavor = "multi_thread")]
async fn basic_3cores_test() -> Result<(), anyhow::Error> {
let _ = env_logger::try_init_from_env(
env_logger::Env::default().filter_or(env_logger::DEFAULT_FILTER_ENV, "info"),
);

let images = zombienet_sdk::environment::get_images_from_env();

let config = NetworkConfigBuilder::new()
.with_relaychain(|r| {
let r = r
.with_chain("rococo-local")
.with_default_command("polkadot")
.with_default_image(images.polkadot.as_str())
.with_default_args(vec![("-lparachain=debug").into()])
.with_genesis_overrides(json!({
"configuration": {
"config": {
"scheduler_params": {
"num_cores": 2,
"max_validators_per_core": 1
},
"async_backing_params": {
"max_candidate_depth": 6,
"allowed_ancestry_len": 2
}
}
}
}))
// Have to set a `with_node` outside of the loop below, so that `r` has the right
// type.
.with_node(|node| node.with_name("validator-0"));

(1..4).fold(r, |acc, i| acc.with_node(|node| node.with_name(&format!("validator-{i}"))))
})
.with_parachain(|p| {
p.with_id(2000)
.with_default_command("adder-collator")
.cumulus_based(false)
.with_default_image(images.cumulus.as_str())
.with_default_args(vec![("-lparachain=debug").into()])
.with_collator(|n| n.with_name("adder-2000"))
})
.with_parachain(|p| {
p.with_id(2001)
.with_default_command("adder-collator")
.cumulus_based(false)
.with_default_image(images.cumulus.as_str())
.with_default_args(vec![("-lparachain=debug").into()])
.with_collator(|n| n.with_name("adder-2001"))
})
.build()
.map_err(|e| {
let errs = e.into_iter().map(|e| e.to_string()).collect::<Vec<_>>().join(" ");
anyhow!("config errs: {errs}")
})?;

let spawn_fn = zombienet_sdk::environment::get_spawn_fn();
let network = spawn_fn(config).await?;

let relay_node = network.get_node("validator-0")?;

let relay_client: OnlineClient<PolkadotConfig> = relay_node.wait_client().await?;
let alice = dev::alice();

// Assign two extra cores to adder-2000.
relay_client
.tx()
.sign_and_submit_then_watch_default(
&rococo::tx()
.sudo()
.sudo(rococo::runtime_types::rococo_runtime::RuntimeCall::Utility(
rococo::runtime_types::pallet_utility::pallet::Call::batch {
calls: vec![
rococo::runtime_types::rococo_runtime::RuntimeCall::Coretime(
rococo::runtime_types::polkadot_runtime_parachains::coretime::pallet::Call::assign_core {
core: 0,
begin: 0,
assignment: vec![(CoreAssignment::Task(2000), PartsOf57600(57600))],
end_hint: None
}
),
rococo::runtime_types::rococo_runtime::RuntimeCall::Coretime(
rococo::runtime_types::polkadot_runtime_parachains::coretime::pallet::Call::assign_core {
core: 1,
begin: 0,
assignment: vec![(CoreAssignment::Task(2000), PartsOf57600(57600))],
end_hint: None
}
),
],
},
)),
&alice,
)
.await?
.wait_for_finalized_success()
.await?;

log::info!("2 more cores assigned to adder-2000");

assert_para_throughput(
&relay_client,
15,
[(2000, 40..46), (2001, 12..16)].into_iter().collect(),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These ranges don't make sense to me. 40 > 3 *12 .. are we too strict for the elastic scaling case or too lenient for the non-elastic scaling case?

Also note: I absolutely had to lookup the code of assert_para_throughput to understand what this is doing. Not a big deal, but e.g. using types like ParaId would have made it easier.

Copy link
Contributor Author

@alindima alindima Dec 19, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These ranges don't make sense to me. 40 > 3 *12 .. are we too strict for the elastic scaling case or too lenient for the non-elastic scaling case?

So these ranges mean that:

we are waiting until we reached 15 finalized blocks (after the first session change, and don't count blocks with session changes).
Then, check that the number of backed blocks for para 2000 is within the 40..46 range (ideally it should be 45, since it's 3*15, but in reality the performance in the CI is not as good).
And check that the number of backed blocks for para 2001 is within the 12..16 range (this para only has one assigned core, so it can get at most 15 blocks in, again allow some buffer here for less performant hardware)

Also note: I absolutely had to lookup the code of assert_para_throughput to understand what this is doing. Not a big deal, but e.g. using types like ParaId would have made it easier.

I can do that 👍🏻

)
.await?;

log::info!("Test finished successfully");

Ok(())
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
// Copyright (C) Parity Technologies (UK) Ltd.
// SPDX-License-Identifier: Apache-2.0

// Test that a paraid that doesn't use elastic scaling which acquired multiple cores does not brick
// itself if ElasticScalingMVP feature is enabled in genesis.

use anyhow::anyhow;

use crate::helpers::{
assert_finalized_block_height, assert_para_throughput, rococo,
rococo::runtime_types::{
pallet_broker::coretime_interface::CoreAssignment,
polkadot_runtime_parachains::assigner_coretime::PartsOf57600,
},
};
use serde_json::json;
use subxt::{OnlineClient, PolkadotConfig};
use subxt_signer::sr25519::dev;
use zombienet_sdk::NetworkConfigBuilder;

#[tokio::test(flavor = "multi_thread")]
async fn doesnt_break_parachains_test() -> Result<(), anyhow::Error> {
let _ = env_logger::try_init_from_env(
env_logger::Env::default().filter_or(env_logger::DEFAULT_FILTER_ENV, "info"),
);

let images = zombienet_sdk::environment::get_images_from_env();

let config = NetworkConfigBuilder::new()
.with_relaychain(|r| {
let r = r
.with_chain("rococo-local")
.with_default_command("polkadot")
.with_default_image(images.polkadot.as_str())
.with_default_args(vec![("-lparachain=debug").into()])
.with_genesis_overrides(json!({
"configuration": {
"config": {
"scheduler_params": {
"num_cores": 1,
"max_validators_per_core": 2
},
"async_backing_params": {
"max_candidate_depth": 6,
"allowed_ancestry_len": 2
}
}
}
}))
// Have to set a `with_node` outside of the loop below, so that `r` has the right
// type.
.with_node(|node| node.with_name("validator-0"));

(1..4).fold(r, |acc, i| acc.with_node(|node| node.with_name(&format!("validator-{i}"))))
})
.with_parachain(|p| {
// Use rococo-parachain default, which has 6 second slot time. Also, don't use
// slot-based collator.
p.with_id(2000)
.with_default_command("polkadot-parachain")
.with_default_image(images.cumulus.as_str())
.with_default_args(vec![("-lparachain=debug,aura=debug").into()])
.with_collator(|n| n.with_name("collator-2000"))
})
.build()
.map_err(|e| {
let errs = e.into_iter().map(|e| e.to_string()).collect::<Vec<_>>().join(" ");
anyhow!("config errs: {errs}")
})?;

let spawn_fn = zombienet_sdk::environment::get_spawn_fn();
let network = spawn_fn(config).await?;

let relay_node = network.get_node("validator-0")?;
let para_node = network.get_node("collator-2000")?;

let relay_client: OnlineClient<PolkadotConfig> = relay_node.wait_client().await?;
let alice = dev::alice();

relay_client
.tx()
.sign_and_submit_then_watch_default(
&rococo::tx()
.sudo()
.sudo(rococo::runtime_types::rococo_runtime::RuntimeCall::Coretime(
rococo::runtime_types::polkadot_runtime_parachains::coretime::pallet::Call::assign_core {
core: 0,
begin: 0,
assignment: vec![(CoreAssignment::Task(2000), PartsOf57600(57600))],
end_hint: None
}
)),
&alice,
)
.await?
.wait_for_finalized_success()
.await?;

log::info!("1 more core assigned to the parachain");
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the previous test we had the assurance that adding additional cores worked because of the throughput assertion. Here we don't have this. I would add a check that the para has indeed two cores (and is still working with the throughput of 1)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok, makes sense

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done


// Expect the parachain to be making normal progress, 1 candidate backed per relay chain block.
assert_para_throughput(&relay_client, 15, [(2000, 13..16)].into_iter().collect()).await?;

let para_client = para_node.wait_client().await?;
// Assert the parachain finalized block height is also on par with the number of backed
// candidates.
assert_finalized_block_height(&para_client, 12..16).await?;

log::info!("Test finished successfully");

Ok(())
}
6 changes: 2 additions & 4 deletions polkadot/zombienet-sdk-tests/tests/elastic_scaling/mod.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
// Copyright (C) Parity Technologies (UK) Ltd.
// SPDX-License-Identifier: Apache-2.0

#[subxt::subxt(runtime_metadata_path = "metadata-files/rococo-local.scale")]
pub mod rococo {}

mod helpers;
mod basic_3cores;
mod doesnt_break_parachains;
mod slot_based_3cores;
Loading
Loading