From 3582826608acc20f75845122e66580ed663617d5 Mon Sep 17 00:00:00 2001 From: Igor Date: Thu, 12 Dec 2024 12:26:54 -0800 Subject: [PATCH] [rest_api][aptos_vm] Prevent running move code on too stale of a state Running code on very stale state (i.e. before node was able to state-sync on startup), leads to confusing outcomes, and also excercises paths that should otherwise never happen. For example - new prologue functions have been introduced, and VM expects them to exist, but genesis framework doesn't have them. --- Cargo.lock | 1 + api/src/basic.rs | 18 +++--------------- api/src/context.rs | 23 ++++++++++++++++++++++- api/src/transactions.rs | 11 +++++++++++ api/src/view_function.rs | 10 ++++++++++ api/types/Cargo.toml | 1 + api/types/src/ledger_info.rs | 10 ++++++++++ config/src/config/api_config.rs | 8 ++++++++ 8 files changed, 66 insertions(+), 16 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ca3e47878f1d8..0b9f877172e73 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -553,6 +553,7 @@ dependencies = [ "aptos-config", "aptos-crypto", "aptos-framework", + "aptos-infallible", "aptos-logger", "aptos-openapi", "aptos-resource-viewer", diff --git a/api/src/basic.rs b/api/src/basic.rs index ae6fd8fb5c3b3..49ff470c229c3 100644 --- a/api/src/basic.rs +++ b/api/src/basic.rs @@ -143,22 +143,10 @@ impl BasicApi { // If we have a duration, check that it's close to the current time, otherwise it's ok if let Some(max_skew) = duration_secs.0 { - let ledger_timestamp = Duration::from_micros(ledger_info.timestamp()); - let skew_threshold = SystemTime::now() - .sub(Duration::from_secs(max_skew as u64)) - .duration_since(UNIX_EPOCH) - .context("Failed to determine absolute unix time based on given duration") - .map_err(|err| { - HealthCheckError::internal_with_code( - err, - AptosErrorCode::InternalError, - &ledger_info, - ) - })?; - - if ledger_timestamp < skew_threshold { + let duration_since_onchain_timestamp = ledger_info.duration_since_timestamp(); + if duration_since_onchain_timestamp > Duration::from_secs(max_skew as u64) { return Err(HealthCheckError::service_unavailable_with_code( - format!("The latest ledger info timestamp is {:?}, which is beyond the allowed skew ({}s).", ledger_timestamp, max_skew), + format!("The latest ledger info timestamp is {:?} old, which is beyond the allowed skew ({}s).", duration_since_onchain_timestamp, max_skew), AptosErrorCode::HealthCheckFailed, &ledger_info, )); diff --git a/api/src/context.rs b/api/src/context.rs index bdc96fa74e0a1..c36e73338cbc0 100644 --- a/api/src/context.rs +++ b/api/src/context.rs @@ -64,7 +64,7 @@ use std::{ atomic::{AtomicU64, AtomicUsize, Ordering}, Arc, RwLock, RwLockWriteGuard, }, - time::Instant, + time::{Duration, Instant}, }; // Context holds application scope context @@ -1554,6 +1554,27 @@ impl Context { pub fn simulate_txn_stats(&self) -> &FunctionStats { &self.simulate_txn_stats } + + pub fn validate_ledger_for_move_code_execution( + &self, + ledger_info: &LedgerInfo, + ) -> Result<(), String> { + let max_stale = Duration::from_secs( + self.node_config + .api + .max_stale_state_to_execute_move_code_on_s, + ); + let duration_since_onchain_timestamp = ledger_info.duration_since_timestamp(); + if max_stale < duration_since_onchain_timestamp { + Err(format!( + "Node is too stale ({}s > {}s limit) to be able to execute move code", + duration_since_onchain_timestamp.as_secs(), + max_stale.as_secs(), + )) + } else { + Ok(()) + } + } } pub struct GasScheduleCache { diff --git a/api/src/transactions.rs b/api/src/transactions.rs index f5cf952c967b5..2e53d1b28a51b 100644 --- a/api/src/transactions.rs +++ b/api/src/transactions.rs @@ -509,6 +509,17 @@ impl TransactionsApi { let context = self.context.clone(); api_spawn_blocking(move || { let ledger_info = context.get_latest_ledger_info()?; + + context + .validate_ledger_for_move_code_execution(&ledger_info) + .map_err(|message| { + SubmitTransactionError::forbidden_with_code( + message, + AptosErrorCode::InternalError, + &ledger_info, + ) + })?; + let mut signed_transaction = api.get_signed_transaction(&ledger_info, data)?; // Confirm the simulation filter allows the transaction. We use HashValue::zero() diff --git a/api/src/view_function.rs b/api/src/view_function.rs index fa6b31e4f732b..0ad75bc80d859 100644 --- a/api/src/view_function.rs +++ b/api/src/view_function.rs @@ -83,6 +83,16 @@ fn view_request( let (ledger_info, requested_version) = context .get_latest_ledger_info_and_verify_lookup_version(ledger_version.map(|inner| inner.0))?; + context + .validate_ledger_for_move_code_execution(&ledger_info) + .map_err(|message| { + BasicErrorWith404::forbidden_with_code( + message, + AptosErrorCode::InternalError, + &ledger_info, + ) + })?; + let state_view = context .state_view_at_version(requested_version) .map_err(|err| { diff --git a/api/types/Cargo.toml b/api/types/Cargo.toml index 4f395c7457dd9..f3904b843062f 100644 --- a/api/types/Cargo.toml +++ b/api/types/Cargo.toml @@ -17,6 +17,7 @@ anyhow = { workspace = true } aptos-config = { workspace = true } aptos-crypto = { workspace = true } aptos-framework = { workspace = true } +aptos-infallible = { workspace = true } aptos-logger = { workspace = true } aptos-openapi = { workspace = true } aptos-resource-viewer = { workspace = true } diff --git a/api/types/src/ledger_info.rs b/api/types/src/ledger_info.rs index 97438ae104013..336386a92d447 100644 --- a/api/types/src/ledger_info.rs +++ b/api/types/src/ledger_info.rs @@ -6,6 +6,7 @@ use crate::U64; use aptos_types::{chain_id::ChainId, ledger_info::LedgerInfoWithSignatures}; use poem_openapi::Object as PoemObject; use serde::{Deserialize, Serialize}; +use std::time::Duration; /// The Ledger information representing the current state of the chain #[derive(Clone, Debug, Deserialize, Serialize, PartialEq, Eq, PoemObject)] @@ -75,4 +76,13 @@ impl LedgerInfo { pub fn timestamp(&self) -> u64 { self.ledger_timestamp.into() } + + pub fn timestamp_duration(&self) -> Duration { + Duration::from_micros(self.ledger_timestamp.into()) + } + + pub fn duration_since_timestamp(&self) -> Duration { + let now = aptos_infallible::duration_since_epoch(); + now.saturating_sub(self.timestamp_duration()) + } } diff --git a/config/src/config/api_config.rs b/config/src/config/api_config.rs index 1758d8c227994..22ec6293608a0 100644 --- a/config/src/config/api_config.rs +++ b/config/src/config/api_config.rs @@ -81,6 +81,12 @@ pub struct ApiConfig { pub simulation_filter: Filter, /// Configuration to filter view function requests. pub view_filter: ViewFilter, + /// Maximum time state (onchain timestamp) is allowed to be stale, for the node + /// to respond to requests that require executing move code. + /// Preventing running code on extremely stale state avoids triggering code paths + /// that should otherwise never be triggered. + /// This is on top of above `simulation_filter` and `view_filter`. + pub max_stale_state_to_execute_move_code_on_s: u64, /// Periodically log stats for view function and simulate transaction usage pub periodic_function_stats_sec: Option, /// The time wait_by_hash will wait before returning 404. @@ -141,6 +147,8 @@ impl Default for ApiConfig { wait_by_hash_timeout_ms: 1_000, wait_by_hash_poll_interval_ms: 20, wait_by_hash_max_active_connections: 100, + // by default, don't execute move code, if state is more than 1 day stale + max_stale_state_to_execute_move_code_on_s: 24 * 3600, } } }