From 54bc3f053dea89e68ffabbaf50c8f898d6242aa9 Mon Sep 17 00:00:00 2001 From: Lu Zhang <8418040+longbowlu@users.noreply.github.com> Date: Fri, 18 Oct 2024 13:32:24 -0700 Subject: [PATCH] [bridge] add watchdog to bridge node (#19878) ## Description 1. run watchdogs on bridge nodes. 2. merges `sui-bridge-watchdog` to `sui-bridge` crate, so there is no circular dependencies ## Test plan unit tests, will deploy locally and test --- ## Release notes Check each box that your changes affect. If none of the boxes relate to your changes, release notes aren't required. For each box you select, include information after the relevant heading that describes the impact of your changes that a user might notice and any actions they must take to implement updates. - [ ] Protocol: - [ ] Nodes (Validators and Full nodes): - [ ] Indexer: - [ ] JSON-RPC: - [ ] GraphQL: - [ ] CLI: - [ ] Rust SDK: - [ ] REST API: --- Cargo.lock | 17 ---- Cargo.toml | 2 - crates/sui-bridge-indexer/Cargo.toml | 1 - crates/sui-bridge-indexer/src/main.rs | 23 +++-- crates/sui-bridge-watchdog/Cargo.toml | 18 ---- .../{src => }/eth_bridge_status.rs | 0 .../{src => }/eth_vault_balance.rs | 0 crates/sui-bridge-watchdog/{src => }/lib.rs | 0 .../sui-bridge-watchdog/{src => }/metrics.rs | 0 .../{src => }/sui_bridge_status.rs | 0 crates/sui-bridge/src/config.rs | 13 +++ crates/sui-bridge/src/e2e_tests/test_utils.rs | 1 + crates/sui-bridge/src/eth_client.rs | 4 + crates/sui-bridge/src/lib.rs | 1 + crates/sui-bridge/src/node.rs | 92 +++++++++++++++++-- .../sui_bridge_watchdog/eth_bridge_status.rs | 58 ++++++++++++ .../sui_bridge_watchdog/eth_vault_balance.rs | 75 +++++++++++++++ .../src/sui_bridge_watchdog/metrics.rs | 52 +++++++++++ .../sui-bridge/src/sui_bridge_watchdog/mod.rs | 62 +++++++++++++ .../sui_bridge_watchdog/sui_bridge_status.rs | 48 ++++++++++ .../src/sui_bridge_watchdog/total_supplies.rs | 65 +++++++++++++ crates/sui-bridge/src/utils.rs | 9 +- crates/sui-e2e-tests/Cargo.toml | 1 - 23 files changed, 483 insertions(+), 59 deletions(-) delete mode 100644 crates/sui-bridge-watchdog/Cargo.toml rename crates/sui-bridge-watchdog/{src => }/eth_bridge_status.rs (100%) rename crates/sui-bridge-watchdog/{src => }/eth_vault_balance.rs (100%) rename crates/sui-bridge-watchdog/{src => }/lib.rs (100%) rename crates/sui-bridge-watchdog/{src => }/metrics.rs (100%) rename crates/sui-bridge-watchdog/{src => }/sui_bridge_status.rs (100%) create mode 100644 crates/sui-bridge/src/sui_bridge_watchdog/eth_bridge_status.rs create mode 100644 crates/sui-bridge/src/sui_bridge_watchdog/eth_vault_balance.rs create mode 100644 crates/sui-bridge/src/sui_bridge_watchdog/metrics.rs create mode 100644 crates/sui-bridge/src/sui_bridge_watchdog/mod.rs create mode 100644 crates/sui-bridge/src/sui_bridge_watchdog/sui_bridge_status.rs create mode 100644 crates/sui-bridge/src/sui_bridge_watchdog/total_supplies.rs diff --git a/Cargo.lock b/Cargo.lock index fefff1f53a1c9..286d572bf63af 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -13122,7 +13122,6 @@ dependencies = [ "serde_yaml 0.8.26", "strum_macros 0.24.3", "sui-bridge", - "sui-bridge-watchdog", "sui-config", "sui-data-ingestion-core", "sui-indexer-builder", @@ -13138,21 +13137,6 @@ dependencies = [ "tracing", ] -[[package]] -name = "sui-bridge-watchdog" -version = "0.1.0" -dependencies = [ - "anyhow", - "async-trait", - "ethers", - "futures", - "mysten-metrics", - "prometheus", - "sui-bridge", - "tokio", - "tracing", -] - [[package]] name = "sui-cluster-test" version = "1.37.0" @@ -13473,7 +13457,6 @@ dependencies = [ "serde_json", "shared-crypto", "sui", - "sui-bridge", "sui-config", "sui-core", "sui-framework", diff --git a/Cargo.toml b/Cargo.toml index bffc8f72aba8a..f2be612798fd2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -93,7 +93,6 @@ members = [ "crates/sui-bridge", "crates/sui-bridge-cli", "crates/sui-bridge-indexer", - "crates/sui-bridge-watchdog", "crates/sui-cluster-test", "crates/sui-config", "crates/sui-core", @@ -619,7 +618,6 @@ sui-archival = { path = "crates/sui-archival" } sui-authority-aggregation = { path = "crates/sui-authority-aggregation" } sui-benchmark = { path = "crates/sui-benchmark" } sui-bridge = { path = "crates/sui-bridge" } -sui-bridge-watchdog = { path = "crates/sui-bridge-watchdog" } sui-cluster-test = { path = "crates/sui-cluster-test" } sui-config = { path = "crates/sui-config" } sui-core = { path = "crates/sui-core" } diff --git a/crates/sui-bridge-indexer/Cargo.toml b/crates/sui-bridge-indexer/Cargo.toml index 4e41c2d6e0100..845f7beca3cdd 100644 --- a/crates/sui-bridge-indexer/Cargo.toml +++ b/crates/sui-bridge-indexer/Cargo.toml @@ -36,7 +36,6 @@ backoff.workspace = true sui-config.workspace = true tempfile.workspace = true sui-indexer-builder.workspace = true -sui-bridge-watchdog.workspace = true [dev-dependencies] sui-types = { workspace = true, features = ["test-utils"] } diff --git a/crates/sui-bridge-indexer/src/main.rs b/crates/sui-bridge-indexer/src/main.rs index 8ba1d128ce1d4..102d87ca91461 100644 --- a/crates/sui-bridge-indexer/src/main.rs +++ b/crates/sui-bridge-indexer/src/main.rs @@ -15,6 +15,7 @@ use std::str::FromStr; use std::sync::Arc; use sui_bridge::eth_client::EthClient; use sui_bridge::metered_eth_provider::{new_metered_eth_provider, MeteredEthHttpProvier}; +use sui_bridge::sui_bridge_watchdog::Observable; use sui_bridge::sui_client::SuiBridgeClient; use sui_bridge::utils::get_eth_contract_addresses; use sui_bridge_indexer::eth_bridge_indexer::EthFinalizedSyncDatasource; @@ -28,6 +29,10 @@ use mysten_metrics::spawn_logged_monitored_task; use mysten_metrics::start_prometheus_server; use sui_bridge::metrics::BridgeMetrics; +use sui_bridge::sui_bridge_watchdog::{ + eth_bridge_status::EthBridgeStatus, eth_vault_balance::EthVaultBalance, + metrics::WatchdogMetrics, sui_bridge_status::SuiBridgeStatus, BridgeWatchDog, +}; use sui_bridge_indexer::config::IndexerConfig; use sui_bridge_indexer::eth_bridge_indexer::EthDataMapper; use sui_bridge_indexer::metrics::BridgeIndexerMetrics; @@ -37,10 +42,6 @@ use sui_bridge_indexer::sui_bridge_indexer::SuiBridgeDataMapper; use sui_bridge_indexer::sui_datasource::SuiCheckpointDatasource; use sui_bridge_indexer::sui_transaction_handler::handle_sui_transactions_loop; use sui_bridge_indexer::sui_transaction_queries::start_sui_tx_polling_task; -use sui_bridge_watchdog::{ - eth_bridge_status::EthBridgeStatus, eth_vault_balance::EthVaultBalance, - metrics::WatchdogMetrics, sui_bridge_status::SuiBridgeStatus, BridgeWatchDog, -}; use sui_data_ingestion_core::DataIngestionMetrics; use sui_indexer_builder::indexer_builder::{BackfillStrategy, IndexerBuilder}; use sui_indexer_builder::progress::{ @@ -247,14 +248,12 @@ async fn start_watchdog( let sui_bridge_status = SuiBridgeStatus::new(sui_client, watchdog_metrics.sui_bridge_paused.clone()); - - BridgeWatchDog::new(vec![ - Arc::new(eth_vault_balance), - Arc::new(eth_bridge_status), - Arc::new(sui_bridge_status), - ]) - .run() - .await; + let observables: Vec> = vec![ + Box::new(eth_vault_balance), + Box::new(eth_bridge_status), + Box::new(sui_bridge_status), + ]; + BridgeWatchDog::new(observables).run().await; Ok(()) } diff --git a/crates/sui-bridge-watchdog/Cargo.toml b/crates/sui-bridge-watchdog/Cargo.toml deleted file mode 100644 index b6148e6bd6222..0000000000000 --- a/crates/sui-bridge-watchdog/Cargo.toml +++ /dev/null @@ -1,18 +0,0 @@ -[package] -name = "sui-bridge-watchdog" -version = "0.1.0" -authors = ["Mysten Labs "] -license = "Apache-2.0" -publish = false -edition = "2021" - -[dependencies] -sui-bridge.workspace = true -mysten-metrics.workspace = true -prometheus.workspace = true -anyhow.workspace = true -futures.workspace = true -async-trait.workspace = true -ethers = { version = "2.0" } -tracing.workspace = true -tokio = { workspace = true, features = ["full"] } diff --git a/crates/sui-bridge-watchdog/src/eth_bridge_status.rs b/crates/sui-bridge-watchdog/eth_bridge_status.rs similarity index 100% rename from crates/sui-bridge-watchdog/src/eth_bridge_status.rs rename to crates/sui-bridge-watchdog/eth_bridge_status.rs diff --git a/crates/sui-bridge-watchdog/src/eth_vault_balance.rs b/crates/sui-bridge-watchdog/eth_vault_balance.rs similarity index 100% rename from crates/sui-bridge-watchdog/src/eth_vault_balance.rs rename to crates/sui-bridge-watchdog/eth_vault_balance.rs diff --git a/crates/sui-bridge-watchdog/src/lib.rs b/crates/sui-bridge-watchdog/lib.rs similarity index 100% rename from crates/sui-bridge-watchdog/src/lib.rs rename to crates/sui-bridge-watchdog/lib.rs diff --git a/crates/sui-bridge-watchdog/src/metrics.rs b/crates/sui-bridge-watchdog/metrics.rs similarity index 100% rename from crates/sui-bridge-watchdog/src/metrics.rs rename to crates/sui-bridge-watchdog/metrics.rs diff --git a/crates/sui-bridge-watchdog/src/sui_bridge_status.rs b/crates/sui-bridge-watchdog/sui_bridge_status.rs similarity index 100% rename from crates/sui-bridge-watchdog/src/sui_bridge_status.rs rename to crates/sui-bridge-watchdog/sui_bridge_status.rs diff --git a/crates/sui-bridge/src/config.rs b/crates/sui-bridge/src/config.rs index e59576417caac..12464b171c621 100644 --- a/crates/sui-bridge/src/config.rs +++ b/crates/sui-bridge/src/config.rs @@ -17,6 +17,7 @@ use ethers::types::Address as EthAddress; use futures::{future, StreamExt}; use serde::{Deserialize, Serialize}; use serde_with::serde_as; +use std::collections::BTreeMap; use std::collections::HashSet; use std::path::PathBuf; use std::str::FromStr; @@ -119,6 +120,9 @@ pub struct BridgeNodeConfig { pub metrics_key_pair: NetworkKeyPair, #[serde(skip_serializing_if = "Option::is_none")] pub metrics: Option, + + #[serde(skip_serializing_if = "Option::is_none")] + pub watchdog_config: Option, } pub fn default_ed25519_key_pair() -> NetworkKeyPair { @@ -133,6 +137,13 @@ pub struct MetricsConfig { pub push_url: String, } +#[derive(Debug, Clone, Deserialize, Serialize)] +#[serde(rename_all = "kebab-case")] +pub struct WatchdogConfig { + /// Total supplies to watch on Sui. Mapping from coin name to coin type tag + pub total_supplies: BTreeMap, +} + impl Config for BridgeNodeConfig {} impl BridgeNodeConfig { @@ -197,6 +208,7 @@ impl BridgeNodeConfig { let bridge_server_config = BridgeServerConfig { key: bridge_authority_key, metrics_port: self.metrics_port, + eth_bridge_proxy_address: eth_contracts[0], // the first contract is bridge proxy server_listen_port: self.server_listen_port, sui_client: sui_client.clone(), eth_client: eth_client.clone(), @@ -385,6 +397,7 @@ impl BridgeNodeConfig { pub struct BridgeServerConfig { pub key: BridgeAuthorityKeyPair, pub server_listen_port: u16, + pub eth_bridge_proxy_address: EthAddress, pub metrics_port: u16, pub sui_client: Arc>, pub eth_client: Arc>, diff --git a/crates/sui-bridge/src/e2e_tests/test_utils.rs b/crates/sui-bridge/src/e2e_tests/test_utils.rs index 187f28d5eb9b0..ef764b4df700e 100644 --- a/crates/sui-bridge/src/e2e_tests/test_utils.rs +++ b/crates/sui-bridge/src/e2e_tests/test_utils.rs @@ -804,6 +804,7 @@ pub(crate) async fn start_bridge_cluster( }, metrics_key_pair: default_ed25519_key_pair(), metrics: None, + watchdog_config: None, }; // Spawn bridge node in memory handles.push( diff --git a/crates/sui-bridge/src/eth_client.rs b/crates/sui-bridge/src/eth_client.rs index 30f8e6d92e536..215ea5428b01b 100644 --- a/crates/sui-bridge/src/eth_client.rs +++ b/crates/sui-bridge/src/eth_client.rs @@ -36,6 +36,10 @@ impl EthClient { self_.describe().await?; Ok(self_) } + + pub fn provider(&self) -> Arc> { + Arc::new(self.provider.clone()) + } } #[cfg(test)] diff --git a/crates/sui-bridge/src/lib.rs b/crates/sui-bridge/src/lib.rs index 0a138372c50fe..6268f9e404b52 100644 --- a/crates/sui-bridge/src/lib.rs +++ b/crates/sui-bridge/src/lib.rs @@ -19,6 +19,7 @@ pub mod node; pub mod orchestrator; pub mod server; pub mod storage; +pub mod sui_bridge_watchdog; pub mod sui_client; pub mod sui_syncer; pub mod sui_transaction_builder; diff --git a/crates/sui-bridge/src/node.rs b/crates/sui-bridge/src/node.rs index 4c22cdab2eb2a..671f2b358f3c0 100644 --- a/crates/sui-bridge/src/node.rs +++ b/crates/sui-bridge/src/node.rs @@ -1,9 +1,20 @@ // Copyright (c) Mysten Labs, Inc. // SPDX-License-Identifier: Apache-2.0 +use crate::config::WatchdogConfig; use crate::crypto::BridgeAuthorityPublicKeyBytes; +use crate::metered_eth_provider::MeteredEthHttpProvier; +use crate::sui_bridge_watchdog::eth_bridge_status::EthBridgeStatus; +use crate::sui_bridge_watchdog::eth_vault_balance::EthVaultBalance; +use crate::sui_bridge_watchdog::metrics::WatchdogMetrics; +use crate::sui_bridge_watchdog::sui_bridge_status::SuiBridgeStatus; +use crate::sui_bridge_watchdog::total_supplies::TotalSupplies; +use crate::sui_bridge_watchdog::{BridgeWatchDog, Observable}; +use crate::sui_client::SuiBridgeClient; use crate::types::BridgeCommittee; -use crate::utils::{get_committee_voting_power_by_name, get_validator_names_by_pub_keys}; +use crate::utils::{ + get_committee_voting_power_by_name, get_eth_contract_addresses, get_validator_names_by_pub_keys, +}; use crate::{ action_executor::BridgeActionExecutor, client::bridge_authority_aggregator::BridgeAuthorityAggregator, @@ -18,6 +29,7 @@ use crate::{ sui_syncer::SuiSyncer, }; use arc_swap::ArcSwap; +use ethers::providers::Provider; use ethers::types::Address as EthAddress; use mysten_metrics::spawn_logged_monitored_task; use std::collections::BTreeMap; @@ -45,6 +57,7 @@ pub async fn run_bridge_node( ) -> anyhow::Result> { init_all_struct_tags(); let metrics = Arc::new(BridgeMetrics::new(&prometheus_registry)); + let watchdog_config = config.watchdog_config.clone(); let (server_config, client_config) = config.validate(metrics.clone()).await?; let sui_chain_identifier = server_config .sui_client @@ -73,6 +86,19 @@ pub async fn run_bridge_node( .await .expect("Failed to get committee"), ); + let mut handles = vec![]; + + // Start watchdog + let eth_provider = server_config.eth_client.provider(); + let eth_bridge_proxy_address = server_config.eth_bridge_proxy_address; + let sui_client = server_config.sui_client.clone(); + handles.push(spawn_logged_monitored_task!(start_watchdog( + watchdog_config, + &prometheus_registry, + eth_provider, + eth_bridge_proxy_address, + sui_client + ))); // Update voting right metrics // Before reconfiguration happens we only set it once when the node starts @@ -84,19 +110,18 @@ pub async fn run_bridge_node( .await?; // Start Client - let _handles = if let Some(client_config) = client_config { + if let Some(client_config) = client_config { let committee_keys_to_names = Arc::new(get_validator_names_by_pub_keys(&committee, &sui_system).await); - start_client_components( + let client_components = start_client_components( client_config, committee.clone(), committee_keys_to_names, metrics.clone(), ) - .await - } else { - Ok(vec![]) - }?; + .await?; + handles.extend(client_components); + } let committee_name_mapping = get_committee_voting_power_by_name(&committee, &sui_system).await; for (name, voting_power) in committee_name_mapping.into_iter() { @@ -125,6 +150,56 @@ pub async fn run_bridge_node( )) } +async fn start_watchdog( + watchdog_config: Option, + registry: &prometheus::Registry, + eth_provider: Arc>, + eth_bridge_proxy_address: EthAddress, + sui_client: Arc, +) { + let watchdog_metrics = WatchdogMetrics::new(registry); + let (_committee_address, _limiter_address, vault_address, _config_address, weth_address) = + get_eth_contract_addresses(eth_bridge_proxy_address, ð_provider) + .await + .unwrap_or_else(|e| panic!("get_eth_contract_addresses should not fail: {}", e)); + + let eth_vault_balance = EthVaultBalance::new( + eth_provider.clone(), + vault_address, + weth_address, + watchdog_metrics.eth_vault_balance.clone(), + ); + + let eth_bridge_status = EthBridgeStatus::new( + eth_provider, + eth_bridge_proxy_address, + watchdog_metrics.eth_bridge_paused.clone(), + ); + + let sui_bridge_status = SuiBridgeStatus::new( + sui_client.clone(), + watchdog_metrics.sui_bridge_paused.clone(), + ); + + let mut observables: Vec> = vec![ + Box::new(eth_vault_balance), + Box::new(eth_bridge_status), + Box::new(sui_bridge_status), + ]; + if let Some(watchdog_config) = watchdog_config { + if !watchdog_config.total_supplies.is_empty() { + let total_supplies = TotalSupplies::new( + Arc::new(sui_client.sui_client().clone()), + watchdog_config.total_supplies, + watchdog_metrics.total_supplies.clone(), + ); + observables.push(Box::new(total_supplies)); + } + } + + BridgeWatchDog::new(observables).run().await +} + // TODO: is there a way to clean up the overrides after it's stored in DB? async fn start_client_components( client_config: BridgeClientConfig, @@ -503,6 +578,7 @@ mod tests { db_path: None, metrics_key_pair: default_ed25519_key_pair(), metrics: None, + watchdog_config: None, }; // Spawn bridge node in memory let _handle = run_bridge_node( @@ -569,6 +645,7 @@ mod tests { db_path: Some(db_path), metrics_key_pair: default_ed25519_key_pair(), metrics: None, + watchdog_config: None, }; // Spawn bridge node in memory let _handle = run_bridge_node( @@ -646,6 +723,7 @@ mod tests { db_path: Some(db_path), metrics_key_pair: default_ed25519_key_pair(), metrics: None, + watchdog_config: None, }; // Spawn bridge node in memory let _handle = run_bridge_node( diff --git a/crates/sui-bridge/src/sui_bridge_watchdog/eth_bridge_status.rs b/crates/sui-bridge/src/sui_bridge_watchdog/eth_bridge_status.rs new file mode 100644 index 0000000000000..2df78d137b62c --- /dev/null +++ b/crates/sui-bridge/src/sui_bridge_watchdog/eth_bridge_status.rs @@ -0,0 +1,58 @@ +// Copyright (c) Mysten Labs, Inc. +// SPDX-License-Identifier: Apache-2.0 + +//! The EthBridgeStatus observable monitors whether the Eth Bridge is paused. + +use crate::abi::EthSuiBridge; +use crate::metered_eth_provider::MeteredEthHttpProvier; +use crate::sui_bridge_watchdog::Observable; +use async_trait::async_trait; +use ethers::providers::Provider; +use ethers::types::Address as EthAddress; +use prometheus::IntGauge; +use std::sync::Arc; +use tokio::time::Duration; +use tracing::{error, info}; + +pub struct EthBridgeStatus { + bridge_contract: EthSuiBridge>, + metric: IntGauge, +} + +impl EthBridgeStatus { + pub fn new( + provider: Arc>, + bridge_address: EthAddress, + metric: IntGauge, + ) -> Self { + let bridge_contract = EthSuiBridge::new(bridge_address, provider.clone()); + Self { + bridge_contract, + metric, + } + } +} + +#[async_trait] +impl Observable for EthBridgeStatus { + fn name(&self) -> &str { + "EthBridgeStatus" + } + + async fn observe_and_report(&self) { + let status = self.bridge_contract.paused().call().await; + match status { + Ok(status) => { + self.metric.set(status as i64); + info!("Eth Bridge Status: {:?}", status); + } + Err(e) => { + error!("Error getting eth bridge status: {:?}", e); + } + } + } + + fn interval(&self) -> Duration { + Duration::from_secs(10) + } +} diff --git a/crates/sui-bridge/src/sui_bridge_watchdog/eth_vault_balance.rs b/crates/sui-bridge/src/sui_bridge_watchdog/eth_vault_balance.rs new file mode 100644 index 0000000000000..b43b7538067d4 --- /dev/null +++ b/crates/sui-bridge/src/sui_bridge_watchdog/eth_vault_balance.rs @@ -0,0 +1,75 @@ +// Copyright (c) Mysten Labs, Inc. +// SPDX-License-Identifier: Apache-2.0 + +use crate::abi::EthERC20; +use crate::metered_eth_provider::MeteredEthHttpProvier; +use crate::sui_bridge_watchdog::Observable; +use async_trait::async_trait; +use ethers::providers::Provider; +use ethers::types::{Address as EthAddress, U256}; +use prometheus::IntGauge; +use std::sync::Arc; +use tokio::time::Duration; +use tracing::{error, info}; + +const TEN_ZEROS: u64 = 10_u64.pow(10); + +pub struct EthVaultBalance { + coin_contract: EthERC20>, + vault_address: EthAddress, + ten_zeros: U256, + metric: IntGauge, +} + +impl EthVaultBalance { + pub fn new( + provider: Arc>, + vault_address: EthAddress, + coin_address: EthAddress, // for now this only support one coin which is WETH + metric: IntGauge, + ) -> Self { + let ten_zeros = U256::from(TEN_ZEROS); + let coin_contract = EthERC20::new(coin_address, provider); + Self { + coin_contract, + vault_address, + ten_zeros, + metric, + } + } +} + +#[async_trait] +impl Observable for EthVaultBalance { + fn name(&self) -> &str { + "EthVaultBalance" + } + + async fn observe_and_report(&self) { + match self + .coin_contract + .balance_of(self.vault_address) + .call() + .await + { + Ok(balance) => { + // Why downcasting is safe: + // 1. On Ethereum we only take the first 8 decimals into account, + // meaning the trailing 10 digits can be ignored + // 2. i64::MAX is 9_223_372_036_854_775_807, with 8 decimal places is + // 92_233_720_368. We likely won't see any balance higher than this + // in the next 12 months. + let balance = (balance / self.ten_zeros).as_u64() as i64; + self.metric.set(balance); + info!("Eth Vault Balance: {:?}", balance); + } + Err(e) => { + error!("Error getting balance from vault: {:?}", e); + } + } + } + + fn interval(&self) -> Duration { + Duration::from_secs(10) + } +} diff --git a/crates/sui-bridge/src/sui_bridge_watchdog/metrics.rs b/crates/sui-bridge/src/sui_bridge_watchdog/metrics.rs new file mode 100644 index 0000000000000..8fea209d7f43f --- /dev/null +++ b/crates/sui-bridge/src/sui_bridge_watchdog/metrics.rs @@ -0,0 +1,52 @@ +// Copyright (c) Mysten Labs, Inc. +// SPDX-License-Identifier: Apache-2.0 + +use prometheus::{ + register_int_gauge_vec_with_registry, register_int_gauge_with_registry, IntGauge, IntGaugeVec, + Registry, +}; + +#[derive(Clone, Debug)] +pub struct WatchdogMetrics { + pub eth_vault_balance: IntGauge, + pub total_supplies: IntGaugeVec, + pub eth_bridge_paused: IntGauge, + pub sui_bridge_paused: IntGauge, +} + +impl WatchdogMetrics { + pub fn new(registry: &Registry) -> Self { + Self { + eth_vault_balance: register_int_gauge_with_registry!( + "bridge_eth_vault_balance", + "Current balance of eth vault", + registry, + ) + .unwrap(), + total_supplies: register_int_gauge_vec_with_registry!( + "bridge_total_supplies", + "Current total supplies of coins on Sui based on Treasury Cap", + &["token_name"], + registry, + ) + .unwrap(), + eth_bridge_paused: register_int_gauge_with_registry!( + "bridge_eth_bridge_paused", + "Whether the eth bridge is paused", + registry, + ) + .unwrap(), + sui_bridge_paused: register_int_gauge_with_registry!( + "bridge_sui_bridge_paused", + "Whether the sui bridge is paused", + registry, + ) + .unwrap(), + } + } + + pub fn new_for_testing() -> Self { + let registry = Registry::new(); + Self::new(®istry) + } +} diff --git a/crates/sui-bridge/src/sui_bridge_watchdog/mod.rs b/crates/sui-bridge/src/sui_bridge_watchdog/mod.rs new file mode 100644 index 0000000000000..63ed7af86990e --- /dev/null +++ b/crates/sui-bridge/src/sui_bridge_watchdog/mod.rs @@ -0,0 +1,62 @@ +// Copyright (c) Mysten Labs, Inc. +// SPDX-License-Identifier: Apache-2.0 + +//! The BridgeWatchDog module is responsible for monitoring the health +//! of the bridge by periodically running various observables and +//! reporting the results. + +use anyhow::Result; +use async_trait::async_trait; +use mysten_metrics::spawn_logged_monitored_task; +use tokio::time::Duration; +use tokio::time::MissedTickBehavior; +use tracing::{error_span, info, Instrument}; + +pub mod eth_bridge_status; +pub mod eth_vault_balance; +pub mod metrics; +pub mod sui_bridge_status; +pub mod total_supplies; + +pub struct BridgeWatchDog { + observables: Vec>, +} + +impl BridgeWatchDog { + pub fn new(observables: Vec>) -> Self { + Self { observables } + } + + pub async fn run(self) { + let mut handles = vec![]; + for observable in self.observables.into_iter() { + let handle = spawn_logged_monitored_task!(Self::run_observable(observable)); + handles.push(handle); + } + // Return when any task returns an error or all tasks exit. + futures::future::try_join_all(handles).await.unwrap(); + unreachable!("watch dog tasks should not exit"); + } + + async fn run_observable(observable: Box) -> Result<()> { + let mut interval = tokio::time::interval(observable.interval()); + interval.set_missed_tick_behavior(MissedTickBehavior::Skip); + let name = observable.name(); + let span = error_span!("observable", name); + loop { + info!("Running observable {}", name); + observable + .observe_and_report() + .instrument(span.clone()) + .await; + interval.tick().await; + } + } +} + +#[async_trait] +pub trait Observable { + fn name(&self) -> &str; + async fn observe_and_report(&self); + fn interval(&self) -> Duration; +} diff --git a/crates/sui-bridge/src/sui_bridge_watchdog/sui_bridge_status.rs b/crates/sui-bridge/src/sui_bridge_watchdog/sui_bridge_status.rs new file mode 100644 index 0000000000000..42506286c55e8 --- /dev/null +++ b/crates/sui-bridge/src/sui_bridge_watchdog/sui_bridge_status.rs @@ -0,0 +1,48 @@ +// Copyright (c) Mysten Labs, Inc. +// SPDX-License-Identifier: Apache-2.0 + +//! The SuiBridgeStatus observable monitors whether the Sui Bridge is paused. + +use crate::sui_bridge_watchdog::Observable; +use crate::sui_client::SuiBridgeClient; +use async_trait::async_trait; +use prometheus::IntGauge; +use std::sync::Arc; + +use tokio::time::Duration; +use tracing::{error, info}; + +pub struct SuiBridgeStatus { + sui_client: Arc, + metric: IntGauge, +} + +impl SuiBridgeStatus { + pub fn new(sui_client: Arc, metric: IntGauge) -> Self { + Self { sui_client, metric } + } +} + +#[async_trait] +impl Observable for SuiBridgeStatus { + fn name(&self) -> &str { + "SuiBridgeStatus" + } + + async fn observe_and_report(&self) { + let status = self.sui_client.is_bridge_paused().await; + match status { + Ok(status) => { + self.metric.set(status as i64); + info!("Sui Bridge Status: {:?}", status); + } + Err(e) => { + error!("Error getting sui bridge status: {:?}", e); + } + } + } + + fn interval(&self) -> Duration { + Duration::from_secs(10) + } +} diff --git a/crates/sui-bridge/src/sui_bridge_watchdog/total_supplies.rs b/crates/sui-bridge/src/sui_bridge_watchdog/total_supplies.rs new file mode 100644 index 0000000000000..199074a8e1a7a --- /dev/null +++ b/crates/sui-bridge/src/sui_bridge_watchdog/total_supplies.rs @@ -0,0 +1,65 @@ +// Copyright (c) Mysten Labs, Inc. +// SPDX-License-Identifier: Apache-2.0 + +//! The SuiBridgeStatus observable monitors whether the Sui Bridge is paused. + +use crate::sui_bridge_watchdog::Observable; +use async_trait::async_trait; +use prometheus::IntGaugeVec; +use std::{collections::BTreeMap, sync::Arc}; +use sui_sdk::SuiClient; + +use tokio::time::Duration; +use tracing::{error, info}; + +pub struct TotalSupplies { + sui_client: Arc, + coins: BTreeMap, + metric: IntGaugeVec, +} + +impl TotalSupplies { + pub fn new( + sui_client: Arc, + coins: BTreeMap, + metric: IntGaugeVec, + ) -> Self { + Self { + sui_client, + coins, + metric, + } + } +} + +#[async_trait] +impl Observable for TotalSupplies { + fn name(&self) -> &str { + "TotalSupplies" + } + + async fn observe_and_report(&self) { + for (coin_name, coin_type) in &self.coins { + let resp = self + .sui_client + .coin_read_api() + .get_total_supply(coin_type.clone()) + .await; + match resp { + Ok(supply) => { + self.metric + .with_label_values(&[coin_name]) + .set(supply.value as i64); + info!("Total supply for {coin_type}: {}", supply.value); + } + Err(e) => { + error!("Error getting total supply for coin {coin_type}: {:?}", e); + } + } + } + } + + fn interval(&self) -> Duration { + Duration::from_secs(10) + } +} diff --git a/crates/sui-bridge/src/utils.rs b/crates/sui-bridge/src/utils.rs index 17a6b629764fc..d6f7ca487e191 100644 --- a/crates/sui-bridge/src/utils.rs +++ b/crates/sui-bridge/src/utils.rs @@ -5,7 +5,7 @@ use crate::abi::{ EthBridgeCommittee, EthBridgeConfig, EthBridgeLimiter, EthBridgeVault, EthSuiBridge, }; use crate::config::{ - default_ed25519_key_pair, BridgeNodeConfig, EthConfig, MetricsConfig, SuiConfig, + default_ed25519_key_pair, BridgeNodeConfig, EthConfig, MetricsConfig, SuiConfig, WatchdogConfig, }; use crate::crypto::BridgeAuthorityKeyPair; use crate::crypto::BridgeAuthorityPublicKeyBytes; @@ -207,6 +207,13 @@ pub fn generate_bridge_node_config_and_write_to_file( push_interval_seconds: None, // use default value push_url: "metrics_proxy_url".to_string(), }), + watchdog_config: Some(WatchdogConfig { + total_supplies: BTreeMap::from_iter(vec![( + "eth".to_string(), + "0xd0e89b2af5e4910726fbcd8b8dd37bb79b29e5f83f7491bca830e94f7f226d29::eth::ETH" + .to_string(), + )]), + }), }; if run_client { config.sui.bridge_client_key_path = Some(PathBuf::from("/path/to/your/bridge_client_key")); diff --git a/crates/sui-e2e-tests/Cargo.toml b/crates/sui-e2e-tests/Cargo.toml index 9d4370817302d..5dd21a3e2820a 100644 --- a/crates/sui-e2e-tests/Cargo.toml +++ b/crates/sui-e2e-tests/Cargo.toml @@ -39,7 +39,6 @@ fastcrypto.workspace = true fastcrypto-zkp.workspace = true move-core-types.workspace = true -sui-bridge.workspace = true sui-core.workspace = true sui-framework.workspace = true sui-json-rpc.workspace = true