Skip to content

Commit

Permalink
[rest_api][aptos_vm] Prevent running move code on too stale of a state
Browse files Browse the repository at this point in the history
Running code on very stale state (i.e. before node was able to state-sync on startup),
leads to confusing outcomes, and also excercises paths that should otherwise never happen.

For example - new prologue functions have been introduced, and VM expects them to exist,
but genesis framework doesn't have them.
  • Loading branch information
igor-aptos committed Dec 12, 2024
1 parent 3c6e693 commit 3582826
Show file tree
Hide file tree
Showing 8 changed files with 66 additions and 16 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

18 changes: 3 additions & 15 deletions api/src/basic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -143,22 +143,10 @@ impl BasicApi {

// If we have a duration, check that it's close to the current time, otherwise it's ok
if let Some(max_skew) = duration_secs.0 {
let ledger_timestamp = Duration::from_micros(ledger_info.timestamp());
let skew_threshold = SystemTime::now()
.sub(Duration::from_secs(max_skew as u64))
.duration_since(UNIX_EPOCH)
.context("Failed to determine absolute unix time based on given duration")
.map_err(|err| {
HealthCheckError::internal_with_code(
err,
AptosErrorCode::InternalError,
&ledger_info,
)
})?;

if ledger_timestamp < skew_threshold {
let duration_since_onchain_timestamp = ledger_info.duration_since_timestamp();
if duration_since_onchain_timestamp > Duration::from_secs(max_skew as u64) {
return Err(HealthCheckError::service_unavailable_with_code(
format!("The latest ledger info timestamp is {:?}, which is beyond the allowed skew ({}s).", ledger_timestamp, max_skew),
format!("The latest ledger info timestamp is {:?} old, which is beyond the allowed skew ({}s).", duration_since_onchain_timestamp, max_skew),
AptosErrorCode::HealthCheckFailed,
&ledger_info,
));
Expand Down
23 changes: 22 additions & 1 deletion api/src/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ use std::{
atomic::{AtomicU64, AtomicUsize, Ordering},
Arc, RwLock, RwLockWriteGuard,
},
time::Instant,
time::{Duration, Instant},
};

// Context holds application scope context
Expand Down Expand Up @@ -1554,6 +1554,27 @@ impl Context {
pub fn simulate_txn_stats(&self) -> &FunctionStats {
&self.simulate_txn_stats
}

pub fn validate_ledger_for_move_code_execution(
&self,
ledger_info: &LedgerInfo,
) -> Result<(), String> {
let max_stale = Duration::from_secs(
self.node_config
.api
.max_stale_state_to_execute_move_code_on_s,
);
let duration_since_onchain_timestamp = ledger_info.duration_since_timestamp();
if max_stale < duration_since_onchain_timestamp {
Err(format!(
"Node is too stale ({}s > {}s limit) to be able to execute move code",
duration_since_onchain_timestamp.as_secs(),
max_stale.as_secs(),
))
} else {
Ok(())
}
}
}

pub struct GasScheduleCache {
Expand Down
11 changes: 11 additions & 0 deletions api/src/transactions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -509,6 +509,17 @@ impl TransactionsApi {
let context = self.context.clone();
api_spawn_blocking(move || {
let ledger_info = context.get_latest_ledger_info()?;

context
.validate_ledger_for_move_code_execution(&ledger_info)
.map_err(|message| {
SubmitTransactionError::forbidden_with_code(
message,
AptosErrorCode::InternalError,
&ledger_info,
)
})?;

let mut signed_transaction = api.get_signed_transaction(&ledger_info, data)?;

// Confirm the simulation filter allows the transaction. We use HashValue::zero()
Expand Down
10 changes: 10 additions & 0 deletions api/src/view_function.rs
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,16 @@ fn view_request(
let (ledger_info, requested_version) = context
.get_latest_ledger_info_and_verify_lookup_version(ledger_version.map(|inner| inner.0))?;

context
.validate_ledger_for_move_code_execution(&ledger_info)
.map_err(|message| {
BasicErrorWith404::forbidden_with_code(
message,
AptosErrorCode::InternalError,
&ledger_info,
)
})?;

let state_view = context
.state_view_at_version(requested_version)
.map_err(|err| {
Expand Down
1 change: 1 addition & 0 deletions api/types/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ anyhow = { workspace = true }
aptos-config = { workspace = true }
aptos-crypto = { workspace = true }
aptos-framework = { workspace = true }
aptos-infallible = { workspace = true }
aptos-logger = { workspace = true }
aptos-openapi = { workspace = true }
aptos-resource-viewer = { workspace = true }
Expand Down
10 changes: 10 additions & 0 deletions api/types/src/ledger_info.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ use crate::U64;
use aptos_types::{chain_id::ChainId, ledger_info::LedgerInfoWithSignatures};
use poem_openapi::Object as PoemObject;
use serde::{Deserialize, Serialize};
use std::time::Duration;

/// The Ledger information representing the current state of the chain
#[derive(Clone, Debug, Deserialize, Serialize, PartialEq, Eq, PoemObject)]
Expand Down Expand Up @@ -75,4 +76,13 @@ impl LedgerInfo {
pub fn timestamp(&self) -> u64 {
self.ledger_timestamp.into()
}

pub fn timestamp_duration(&self) -> Duration {
Duration::from_micros(self.ledger_timestamp.into())
}

pub fn duration_since_timestamp(&self) -> Duration {
let now = aptos_infallible::duration_since_epoch();
now.saturating_sub(self.timestamp_duration())
}
}
8 changes: 8 additions & 0 deletions config/src/config/api_config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,12 @@ pub struct ApiConfig {
pub simulation_filter: Filter,
/// Configuration to filter view function requests.
pub view_filter: ViewFilter,
/// Maximum time state (onchain timestamp) is allowed to be stale, for the node
/// to respond to requests that require executing move code.
/// Preventing running code on extremely stale state avoids triggering code paths
/// that should otherwise never be triggered.
/// This is on top of above `simulation_filter` and `view_filter`.
pub max_stale_state_to_execute_move_code_on_s: u64,
/// Periodically log stats for view function and simulate transaction usage
pub periodic_function_stats_sec: Option<u64>,
/// The time wait_by_hash will wait before returning 404.
Expand Down Expand Up @@ -141,6 +147,8 @@ impl Default for ApiConfig {
wait_by_hash_timeout_ms: 1_000,
wait_by_hash_poll_interval_ms: 20,
wait_by_hash_max_active_connections: 100,
// by default, don't execute move code, if state is more than 1 day stale
max_stale_state_to_execute_move_code_on_s: 24 * 3600,
}
}
}
Expand Down

0 comments on commit 3582826

Please sign in to comment.