Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix reshare timing issue #1211

Merged
merged 11 commits into from
Dec 18, 2024
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions crates/shared/src/constants.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,5 @@ pub const MAX_SIGNERS: u8 = 15;
/// Threshold for those signers
pub const SIGNER_THRESHOLD: u8 = 2;

/// For testing to line up chain mock data and reshare_test
pub const TEST_RESHARE_BLOCK_NUMBER: u32 = 10;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh i am happy to say goodbye to this one


/// Program version number, must be incremented if version number changes
pub const PROGRAM_VERSION_NUMBER: u8 = 0;
2 changes: 1 addition & 1 deletion crates/threshold-signature-server/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ axum ={ version="0.7.9", features=["ws"] }
subxt ="0.35.3"
parity-scale-codec="3.6.12"
sp-core ={ version="31.0.0", default-features=false }
sp-keyring ="34.0.0"

# Entropy
entropy-shared ={ version="0.3.0", path="../shared" }
Expand Down Expand Up @@ -79,7 +80,6 @@ configfs-tsm ={ version="0.0.1", optional=true }
serial_test ="3.2.0"
hex-literal ="0.4.1"
project-root="0.2.2"
sp-keyring ="34.0.0"
more-asserts="0.3.1"
lazy_static ="1.5.0"
blake3 ="1.5.5"
Expand Down
17 changes: 17 additions & 0 deletions crates/threshold-signature-server/src/helpers/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ use crate::{
chain_api::{
entropy::{
self,
runtime_types::entropy_runtime::RuntimeCall,
runtime_types::pallet_staking_extension::pallet::{JumpStartStatus, ServerInfo},
},
EntropyConfig,
Expand All @@ -48,6 +49,7 @@ use entropy_protocol::PartyId;
#[cfg(test)]
use entropy_shared::EncodedVerifyingKey;
use entropy_shared::NETWORK_PARENT_KEY;
use sp_keyring::AccountKeyring;
use std::{fmt, net::SocketAddr, str, time::Duration};
use subxt::{
backend::legacy::LegacyRpcMethods, ext::sp_core::sr25519, tx::PairSigner,
Expand Down Expand Up @@ -370,3 +372,18 @@ pub fn get_port(server_info: &ServerInfo<SubxtAccountId32>) -> u32 {
str::from_utf8(&server_info.endpoint).unwrap().parse().unwrap();
socket_address.port().into()
}

/// Calls set storage from sudo for testing, allows use to manipulate chain storage.
pub async fn call_set_storage(
api: &OnlineClient<EntropyConfig>,
rpc: &LegacyRpcMethods<EntropyConfig>,
call: RuntimeCall,
) {
let set_storage = entropy::tx().sudo().sudo(call);
let alice = AccountKeyring::Alice;

let signature_request_pair_signer =
PairSigner::<EntropyConfig, sp_core::sr25519::Pair>::new(alice.into());

submit_transaction(api, rpc, &signature_request_pair_signer, &set_storage, None).await.unwrap();
}
2 changes: 1 addition & 1 deletion crates/threshold-signature-server/src/validator/api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -295,7 +295,7 @@ pub async fn validate_new_reshare(
.await?
.ok_or_else(|| ValidatorErr::ChainFetch("Not Currently in a reshare"))?;

if chain_data.block_number != reshare_data.block_number.saturating_sub(1)
if chain_data.block_number != reshare_data.block_number
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The sub was removed as now it matches the running testnet

|| chain_data.new_signers != reshare_data.new_signers
{
return Err(ValidatorErr::InvalidData);
Expand Down
90 changes: 82 additions & 8 deletions crates/threshold-signature-server/src/validator/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ use crate::{
helpers::{
launch::{FORBIDDEN_KEYS, LATEST_BLOCK_NUMBER_RESHARE},
tests::{
get_port, initialize_test_logger, run_to_block, setup_client, spawn_testing_validators,
unsafe_get,
call_set_storage, get_port, initialize_test_logger, run_to_block, setup_client,
spawn_testing_validators, unsafe_get,
},
},
validator::{
Expand All @@ -29,8 +29,13 @@ use crate::{
use entropy_client::{self as test_client};
use entropy_client::{
chain_api::{
entropy, entropy::runtime_types::bounded_collections::bounded_vec::BoundedVec,
entropy::runtime_types::pallet_registry::pallet::ProgramInstance, get_api, get_rpc,
entropy,
entropy::runtime_types::bounded_collections::bounded_vec::BoundedVec,
entropy::runtime_types::entropy_runtime::RuntimeCall,
entropy::runtime_types::frame_system::pallet::Call as SystemsCall,
entropy::runtime_types::pallet_registry::pallet::ProgramInstance,
entropy::runtime_types::pallet_staking_extension::pallet::{NextSignerInfo, ReshareInfo},
get_api, get_rpc,
},
substrate::query_chain,
Hasher,
Expand Down Expand Up @@ -69,7 +74,8 @@ async fn test_reshare_basic() {
.await;
let api = get_api(&context[0].ws_url).await.unwrap();
let rpc = get_rpc(&context[0].ws_url).await.unwrap();

let alice_stash = AccountKeyring::AliceStash;
let dave_stash = AccountKeyring::DaveStash;
let client = reqwest::Client::new();

// Get current signers
Expand All @@ -78,7 +84,9 @@ async fn test_reshare_basic() {
let old_signer_ids: HashSet<[u8; 32]> =
HashSet::from_iter(signer_stash_accounts.clone().into_iter().map(|id| id.0));
let mut signers = Vec::new();
let mut next_signers = vec![];
for signer in signer_stash_accounts.iter() {
next_signers.push(signer);
let query = entropy::storage().staking_extension().threshold_servers(signer);
let server_info = query_chain(&api, &rpc, query, None).await.unwrap().unwrap();
signers.push(server_info);
Expand All @@ -89,6 +97,29 @@ async fn test_reshare_basic() {
let key_share = unsafe_get(&client, hex::encode(NETWORK_PARENT_KEY), port).await;
assert!(!key_share.is_empty());
}
next_signers.remove(0);
let binding = dave_stash.to_account_id().into();
next_signers.push(&binding);
let block_number = rpc.chain_get_header(None).await.unwrap().unwrap().number + 1;
let storage_address_next_signers = entropy::storage().staking_extension().next_signers();
let value_next_signers =
NextSignerInfo { confirmations: vec![], next_signers: next_signers.clone() };
// Add reshare
let call = RuntimeCall::System(SystemsCall::set_storage {
items: vec![(storage_address_next_signers.to_root_bytes(), value_next_signers.encode())],
});
call_set_storage(&api, &rpc, call).await;

let storage_address_reshare_data = entropy::storage().staking_extension().reshare_data();
let value_reshare_info =
ReshareInfo { block_number, new_signers: vec![dave_stash.public().encode()] };
// Add reshare
let call = RuntimeCall::System(SystemsCall::set_storage {
items: vec![(storage_address_reshare_data.to_root_bytes(), value_reshare_info.encode())],
});
call_set_storage(&api, &rpc, call).await;

let key_share_before = unsafe_get(&client, hex::encode(NETWORK_PARENT_KEY), 3002).await;

let mut i = 0;
// Wait up to 2min for reshare to complete: check once every second if we have a new set of signers.
Expand All @@ -101,14 +132,19 @@ async fn test_reshare_basic() {
if new_signer_ids != old_signer_ids {
break Ok(new_signer_ids);
}
if i > 120 {
if i > 240 {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Interesting. Was this needed to get the tests to pass? Disappointingly we are still often getting this timeout error, and i kinda thought it could not be that the reshare takes more than two minutes.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

probably not, I changed it when I was doing to reshares so it took more time, but I mean doesn't hurt

break Err("Timed out waiting for reshare");
}
i += 1;
tokio::time::sleep(std::time::Duration::from_secs(1)).await;
}
.unwrap();

// wait for roatate keyshare
tokio::time::sleep(std::time::Duration::from_secs(10)).await;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah now i see what you mean about rotate keyshare happening after signers have changed. Yeah totally makes sense that you have to wait here a block before checking the keyshare has changed.


let key_share_after = unsafe_get(&client, hex::encode(NETWORK_PARENT_KEY), 3002).await;
assert_ne!(key_share_before, key_share_after);
// At this point the signing set has changed on-chain, but the keyshares haven't been rotated
// but by the time we have stored a program and registered, the rotation should have happened

Expand Down Expand Up @@ -173,6 +209,36 @@ async fn test_reshare_basic() {
let key_share = unsafe_get(&client, hex::encode(NETWORK_PARENT_KEY), port).await;
assert!(!key_share.is_empty());
}
let block_number = rpc.chain_get_header(None).await.unwrap().unwrap().number + 1;
let key_share_before_2 = unsafe_get(&client, hex::encode(NETWORK_PARENT_KEY), 3003).await;

next_signers.remove(0);
let binding = alice_stash.to_account_id().into();
next_signers.push(&binding);

let storage_address_next_signers = entropy::storage().staking_extension().next_signers();
let value_next_signers = NextSignerInfo { confirmations: vec![], next_signers };
// Add another reshare
let call = RuntimeCall::System(SystemsCall::set_storage {
items: vec![(storage_address_next_signers.to_root_bytes(), value_next_signers.encode())],
});
call_set_storage(&api, &rpc, call).await;

let storage_address_reshare_data = entropy::storage().staking_extension().reshare_data();
let value_reshare_info =
ReshareInfo { block_number, new_signers: vec![alice_stash.public().encode()] };
// Add another reshare
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This comment made me think you are doing a third reshare here - but this is part of the second reshare right? We need to modify next_signers and reshare_data.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yup, I can try to find better comments

let call = RuntimeCall::System(SystemsCall::set_storage {
items: vec![(storage_address_reshare_data.to_root_bytes(), value_reshare_info.encode())],
});
call_set_storage(&api, &rpc, call).await;

// wait for roatate keyshare
tokio::time::sleep(std::time::Duration::from_secs(60)).await;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems a bit brittle - how come one minute here but above we have 4 mins plus 10 seconds for the key rotation.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes on the second I am just checking that the rotate key share happend, got to 60 seconds from trial and error, the above one exists if finished early so I didn't try to make it shorter


let key_share_after_2 = unsafe_get(&client, hex::encode(NETWORK_PARENT_KEY), 3003).await;
assert_ne!(key_share_before_2, key_share_after_2);

clean_tests();
}

Expand Down Expand Up @@ -225,9 +291,17 @@ async fn test_reshare_validation_fail() {
validate_new_reshare(&api, &rpc, &ocw_message, &kv).await.map_err(|e| e.to_string());
assert_eq!(err_stale_data, Err("Data is stale".to_string()));

let block_number = rpc.chain_get_header(None).await.unwrap().unwrap().number + 1;
let block_number = rpc.chain_get_header(None).await.unwrap().unwrap().number;
let storage_address_reshare_data = entropy::storage().staking_extension().reshare_data();
let value_reshare_info =
ReshareInfo { block_number: block_number + 1, new_signers: vec![dave.public().encode()] };
// Add reshare
let call = RuntimeCall::System(SystemsCall::set_storage {
items: vec![(storage_address_reshare_data.to_root_bytes(), value_reshare_info.encode())],
});

ocw_message.block_number = block_number;
run_to_block(&rpc, block_number + 1).await;
call_set_storage(&api, &rpc, call).await;

let err_incorrect_data =
validate_new_reshare(&api, &rpc, &ocw_message, &kv).await.map_err(|e| e.to_string());
Expand Down
1 change: 0 additions & 1 deletion node/cli/src/chain_spec/dev.rs
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,6 @@ pub fn development_genesis_config(
})
.collect::<Vec<_>>(),
proactive_refresh_data: (vec![], vec![]),
mock_signer_rotate: (false, vec![], vec![]),
jump_started_signers: None,
},
"elections": ElectionsConfig {
Expand Down
6 changes: 0 additions & 6 deletions node/cli/src/chain_spec/integration_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,10 +66,6 @@ pub fn integration_tests_config(jumpstarted: bool) -> ChainSpec {
],
vec![],
get_account_id_from_seed::<sr25519::Public>("Alice"),
vec![
get_account_id_from_seed::<sr25519::Public>("Bob//stash"),
get_account_id_from_seed::<sr25519::Public>("Charlie//stash"),
],
jump_started_signers,
))
.build()
Expand All @@ -87,7 +83,6 @@ pub fn integration_tests_genesis_config(
)>,
initial_nominators: Vec<AccountId>,
root_key: AccountId,
mock_signer_rotate_data: Vec<AccountId>,
jump_started_signers: Option<Vec<AccountId>>,
) -> serde_json::Value {
// Note that any endowed_accounts added here will be included in the `elections` and
Expand Down Expand Up @@ -229,7 +224,6 @@ pub fn integration_tests_genesis_config(
],
vec![PREGENERATED_NETWORK_VERIFYING_KEY.to_vec(), DAVE_VERIFYING_KEY.to_vec()],
),
mock_signer_rotate: (true, mock_signer_rotate_data, vec![get_account_id_from_seed::<sr25519::Public>("Dave//stash")]),
jump_started_signers,
},
"elections": ElectionsConfig {
Expand Down
1 change: 0 additions & 1 deletion node/cli/src/chain_spec/testnet.rs
Original file line number Diff line number Diff line change
Expand Up @@ -427,7 +427,6 @@ pub fn testnet_genesis_config(
})
.collect::<Vec<_>>(),
proactive_refresh_data: (vec![], vec![]),
mock_signer_rotate: (false, vec![], vec![]),
jump_started_signers: None,
},
"elections": ElectionsConfig {
Expand Down
1 change: 0 additions & 1 deletion pallets/attestation/src/mock.rs
Original file line number Diff line number Diff line change
Expand Up @@ -360,7 +360,6 @@ pub fn new_test_ext() -> sp_io::TestExternalities {
(5, (0, NULL_ARR, vec![20], pck_encoded.to_vec().try_into().unwrap())),
],
proactive_refresh_data: (vec![], vec![]),
mock_signer_rotate: (false, vec![], vec![]),
jump_started_signers: None,
};
pallet_staking_extension.assimilate_storage(&mut t).unwrap();
Expand Down
4 changes: 2 additions & 2 deletions pallets/propagation/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -160,11 +160,11 @@ pub mod pallet {
/// Submits a request to do a key refresh on the signers parent key.
pub fn post_reshare(block_number: BlockNumberFor<T>) -> Result<(), http::Error> {
let reshare_data = pallet_staking_extension::Pallet::<T>::reshare_data();
if reshare_data.block_number != block_number {
if reshare_data.block_number + sp_runtime::traits::One::one() != block_number {
return Ok(());
}

let deadline = sp_io::offchain::timestamp().add(Duration::from_millis(2_000));
let deadline = sp_io::offchain::timestamp().add(Duration::from_millis(10_000));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No harm in leaving this here i guess, but in theory it should not be needed outside of the hot fix because resharing happens in a separate task

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

true, gonna leave it tho cuz why not

let kind = sp_core::offchain::StorageKind::PERSISTENT;
let from_local = sp_io::offchain::local_storage_get(kind, b"reshare_validators")
.unwrap_or_else(|| b"http://localhost:3001/validator/reshare".to_vec());
Expand Down
1 change: 0 additions & 1 deletion pallets/propagation/src/mock.rs
Original file line number Diff line number Diff line change
Expand Up @@ -406,7 +406,6 @@ pub fn new_test_ext() -> sp_io::TestExternalities {
(2, (4, NULL_ARR, vec![11], BoundedVec::with_max_capacity())),
],
proactive_refresh_data: (vec![], vec![]),
mock_signer_rotate: (false, vec![], vec![]),
jump_started_signers: None,
};

Expand Down
2 changes: 1 addition & 1 deletion pallets/propagation/src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ fn knows_how_to_mock_several_http_calls() {
// doesn't trigger no reshare block
Propagation::post_reshare(7).unwrap();
pallet_staking_extension::ReshareData::<Test>::put(ReshareInfo {
block_number: 7,
block_number: 6,
new_signers: vec![1u64.encode()],
});
// now triggers
Expand Down
1 change: 0 additions & 1 deletion pallets/registry/src/mock.rs
Original file line number Diff line number Diff line change
Expand Up @@ -393,7 +393,6 @@ pub fn new_test_ext() -> sp_io::TestExternalities {
(7, (4, NULL_ARR, vec![50], BoundedVec::with_max_capacity())),
],
proactive_refresh_data: (vec![], vec![]),
mock_signer_rotate: (false, vec![], vec![]),
jump_started_signers: None,
};

Expand Down
22 changes: 2 additions & 20 deletions pallets/staking/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ use sp_staking::SessionIndex;
pub mod pallet {
use entropy_shared::{
QuoteContext, ValidatorInfo, X25519PublicKey, MAX_SIGNERS,
PREGENERATED_NETWORK_VERIFYING_KEY, TEST_RESHARE_BLOCK_NUMBER, VERIFICATION_KEY_LENGTH,
PREGENERATED_NETWORK_VERIFYING_KEY, VERIFICATION_KEY_LENGTH,
};
use frame_support::{
dispatch::{DispatchResult, DispatchResultWithPostInfo},
Expand Down Expand Up @@ -262,8 +262,6 @@ pub mod pallet {
pub threshold_servers: Vec<ThresholdServersConfig<T>>,
/// validator info and accounts to take part in proactive refresh
pub proactive_refresh_data: (Vec<ValidatorInfo>, Vec<Vec<u8>>),
/// validator info and account new signer to take part in a reshare
pub mock_signer_rotate: (bool, Vec<T::ValidatorId>, Vec<T::ValidatorId>),
/// Whether to begin in an already jumpstarted state in order to be able to test signing
/// using pre-generated keyshares
pub jump_started_signers: Option<Vec<T::ValidatorId>>,
Expand Down Expand Up @@ -295,22 +293,6 @@ pub mod pallet {
proactive_refresh_keys: self.proactive_refresh_data.1.clone(),
};
ProactiveRefresh::<T>::put(refresh_info);
// mocks a signer rotation for tss new_reshare tests
if self.mock_signer_rotate.0 {
let next_signers = &mut self.mock_signer_rotate.1.clone();
let mut new_signers = vec![];
for new_signer in self.mock_signer_rotate.2.clone() {
next_signers.push(new_signer.clone());
new_signers.push(new_signer.encode())
}
let next_signers = next_signers.to_vec();
NextSigners::<T>::put(NextSignerInfo { next_signers, confirmations: vec![] });
ReshareData::<T>::put(ReshareInfo {
// To give enough time for test_reshare setup
block_number: TEST_RESHARE_BLOCK_NUMBER.into(),
new_signers,
})
}

if let Some(jump_started_signers) = &self.jump_started_signers {
Signers::<T>::put(jump_started_signers.clone());
Expand Down Expand Up @@ -876,7 +858,7 @@ pub mod pallet {
// trigger reshare at next block
let current_block_number = <frame_system::Pallet<T>>::block_number();
let reshare_info = ReshareInfo {
block_number: current_block_number + sp_runtime::traits::One::one(),
block_number: current_block_number - sp_runtime::traits::One::one(),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems kinda weird that we subtract one here and then add one again in the propagation pallet. But since it works, definitely don't touch it.

new_signers,
};

Expand Down
1 change: 0 additions & 1 deletion pallets/staking/src/mock.rs
Original file line number Diff line number Diff line change
Expand Up @@ -441,7 +441,6 @@ pub fn new_test_ext() -> sp_io::TestExternalities {
(6, (8, NULL_ARR, vec![40], BoundedVec::with_max_capacity())),
],
proactive_refresh_data: (vec![], vec![]),
mock_signer_rotate: (false, vec![], vec![]),
jump_started_signers: None,
};
pallet_balances.assimilate_storage(&mut t).unwrap();
Expand Down
12 changes: 1 addition & 11 deletions pallets/staking/src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -555,11 +555,7 @@ fn it_tests_new_session_handler() {
assert_ok!(Staking::new_session_handler(&[1, 5, 6]));
// takes signers original (5,6) pops off one and adds in new validator
assert_eq!(Staking::next_signers().unwrap().next_signers, vec![6, 1]);
assert_eq!(
Staking::reshare_data().block_number,
101,
"Check reshare block start at 100 + 1"
);
assert_eq!(Staking::reshare_data().block_number, 99, "Check reshare block start at 99 + 1");
assert_eq!(
Staking::reshare_data().new_signers,
vec![1u64.encode()],
Expand All @@ -571,12 +567,6 @@ fn it_tests_new_session_handler() {
"parent key threhsold updated"
);

assert_eq!(
Staking::reshare_data().block_number,
101,
"Check reshare block start at 100 + 1"
);

assert_ok!(Staking::new_session_handler(&[6, 5, 3]));
// takes 3 and leaves 5 and 6 since already in signer group
assert_eq!(Staking::next_signers().unwrap().next_signers, vec![6, 3]);
Expand Down
Loading