diff --git a/crates/mysten-metrics/src/lib.rs b/crates/mysten-metrics/src/lib.rs index 3fb40de20573a..1f07955b52e24 100644 --- a/crates/mysten-metrics/src/lib.rs +++ b/crates/mysten-metrics/src/lib.rs @@ -15,8 +15,8 @@ use std::time::Instant; use once_cell::sync::OnceCell; use prometheus::{ - register_histogram_with_registry, register_int_gauge_vec_with_registry, Histogram, IntGaugeVec, - Registry, TextEncoder, + register_histogram_with_registry, register_int_gauge_vec_with_registry, Histogram, IntCounter, + IntGaugeVec, Registry, TextEncoder, }; use tap::TapFallible; use tracing::{warn, Span}; @@ -69,6 +69,7 @@ pub struct Metrics { pub scope_duration_ns: IntGaugeVec, pub scope_entrance: IntGaugeVec, pub thread_stall_duration_sec: Histogram, + pub system_invariant_violations: IntCounter, } impl Metrics { @@ -143,6 +144,10 @@ impl Metrics { registry, ) .unwrap(), + system_invariant_violations: IntCounter::new( + "system_invariant_violations", + "Number of system invariant violations", + ).unwrap(), } } } @@ -630,6 +635,20 @@ pub async fn metrics( } } +#[macro_export] +macro_rules! system_invariant_violation { + ($msg:expr) => { + if cfg!(debug_assertions) { + panic!("System Invariant Violation: {}", $msg); + } else { + if let Some(metrics) = $crate::get_metrics() { + metrics.system_invariant_violations.inc(); + } + error!("System Invariant Violation: {}", $msg); + } + }; +} + #[cfg(test)] mod tests { use crate::RegistryService; diff --git a/crates/sui-core/src/authority.rs b/crates/sui-core/src/authority.rs index 9a7b6c341e524..4bb324e21b7ec 100644 --- a/crates/sui-core/src/authority.rs +++ b/crates/sui-core/src/authority.rs @@ -21,7 +21,7 @@ use move_binary_format::binary_config::BinaryConfig; use move_binary_format::CompiledModule; use move_core_types::annotated_value::MoveStructLayout; use move_core_types::language_storage::ModuleId; -use mysten_metrics::{TX_TYPE_SHARED_OBJ_TX, TX_TYPE_SINGLE_WRITER_TX}; +use mysten_metrics::{system_invariant_violation, TX_TYPE_SHARED_OBJ_TX, TX_TYPE_SINGLE_WRITER_TX}; use parking_lot::Mutex; use prometheus::{ register_histogram_vec_with_registry, register_histogram_with_registry, @@ -303,8 +303,6 @@ pub struct AuthorityMetrics { /// bytecode verifier metrics for tracking timeouts pub bytecode_verifier_metrics: Arc, - pub authenticator_state_update_failed: IntCounter, - /// Count of zklogin signatures pub zklogin_sig_count: IntCounter, /// Count of multisig signatures @@ -736,12 +734,6 @@ impl AuthorityMetrics { ).unwrap(), limits_metrics: Arc::new(LimitsMetrics::new(registry)), bytecode_verifier_metrics: Arc::new(BytecodeVerifierMetrics::new(registry)), - authenticator_state_update_failed: register_int_counter_with_registry!( - "authenticator_state_update_failed", - "Number of failed authenticator state updates", - registry, - ) - .unwrap(), zklogin_sig_count: register_int_counter_with_registry!( "zklogin_sig_count", "Count of zkLogin signatures", @@ -1511,10 +1503,11 @@ impl AuthorityState { certificate.data().transaction_data().kind() { if let Some(err) = &execution_error_opt { - error!("Authenticator state update failed: {err}"); - self.metrics.authenticator_state_update_failed.inc(); + system_invariant_violation!(format!( + "Authenticator state update failed: {:?}", + err + )); } - debug_assert!(execution_error_opt.is_none()); epoch_store.update_authenticator_state(auth_state); // double check that the signature verifier always matches the authenticator state