From a3ff077bfcd25c1e4860ae4f7b1749181fbdda6f Mon Sep 17 00:00:00 2001 From: WenyXu Date: Fri, 12 Jan 2024 03:41:23 +0000 Subject: [PATCH] fix: print detailed error --- src/common/meta/src/ddl/alter_table.rs | 6 ++--- src/common/meta/src/ddl/utils.rs | 2 +- src/meta-srv/src/error.rs | 9 +++++++ src/meta-srv/src/procedure/region_failover.rs | 7 +++--- .../region_failover/failover_start.rs | 13 +++++----- .../region_failover/update_metadata.rs | 17 ++++++------- .../src/procedure/region_migration.rs | 25 ++++++++++--------- .../downgrade_leader_region.rs | 2 +- .../region_migration/open_candidate_region.rs | 2 +- .../downgrade_leader_region.rs | 15 ++++++----- .../rollback_downgraded_region.rs | 10 ++++---- .../upgrade_candidate_region.rs | 12 ++++----- .../upgrade_candidate_region.rs | 4 +-- src/meta-srv/src/procedure/utils.rs | 2 +- 14 files changed, 67 insertions(+), 59 deletions(-) diff --git a/src/common/meta/src/ddl/alter_table.rs b/src/common/meta/src/ddl/alter_table.rs index d866462db1ac..aa9e505449e8 100644 --- a/src/common/meta/src/ddl/alter_table.rs +++ b/src/common/meta/src/ddl/alter_table.rs @@ -40,7 +40,7 @@ use table::requests::AlterKind; use crate::cache_invalidator::Context; use crate::ddl::utils::handle_operate_region_error; use crate::ddl::DdlContext; -use crate::error::{self, ConvertAlterTableRequestSnafu, InvalidProtoMsgSnafu, Result}; +use crate::error::{self, ConvertAlterTableRequestSnafu, Error, InvalidProtoMsgSnafu, Result}; use crate::key::table_info::TableInfoValue; use crate::key::table_name::TableNameKey; use crate::key::DeserializedValueWithBytes; @@ -374,8 +374,8 @@ impl Procedure for AlterTableProcedure { } async fn execute(&mut self, _ctx: &ProcedureContext) -> ProcedureResult { - let error_handler = |e| { - if matches!(e, error::Error::RetryLater { .. }) { + let error_handler = |e: Error| { + if e.is_retry_later() { ProcedureError::retry_later(e) } else { ProcedureError::external(e) diff --git a/src/common/meta/src/ddl/utils.rs b/src/common/meta/src/ddl/utils.rs index edc31b80c4ff..63f669fcd20b 100644 --- a/src/common/meta/src/ddl/utils.rs +++ b/src/common/meta/src/ddl/utils.rs @@ -36,7 +36,7 @@ pub fn handle_operate_region_error(datanode: Peer) -> impl FnOnce(crate::error:: } pub fn handle_retry_error(e: Error) -> ProcedureError { - if matches!(e, error::Error::RetryLater { .. }) { + if e.is_retry_later() { ProcedureError::retry_later(e) } else { ProcedureError::external(e) diff --git a/src/meta-srv/src/error.rs b/src/meta-srv/src/error.rs index 5f185f5f0d10..b5b13c0c7d1b 100644 --- a/src/meta-srv/src/error.rs +++ b/src/meta-srv/src/error.rs @@ -540,6 +540,13 @@ pub enum Error { #[snafu(display("Expected to retry later, reason: {}", reason))] RetryLater { reason: String, location: Location }, + #[snafu(display("Expected to retry later, reason: {}", reason))] + RetryLaterWithSource { + reason: String, + location: Location, + source: BoxedError, + }, + #[snafu(display("Failed to update table metadata, err_msg: {}", err_msg))] UpdateTableMetadata { err_msg: String, location: Location }, @@ -628,6 +635,7 @@ impl Error { /// Returns `true` if the error is retryable. pub fn is_retryable(&self) -> bool { matches!(self, Error::RetryLater { .. }) + || matches!(self, Error::RetryLaterWithSource { .. }) } } @@ -665,6 +673,7 @@ impl ErrorExt for Error { | Error::MailboxTimeout { .. } | Error::MailboxReceiver { .. } | Error::RetryLater { .. } + | Error::RetryLaterWithSource { .. } | Error::StartGrpc { .. } | Error::UpdateTableMetadata { .. } | Error::NoEnoughAvailableDatanode { .. } diff --git a/src/meta-srv/src/procedure/region_failover.rs b/src/meta-srv/src/procedure/region_failover.rs index 0688d575933d..7af5b86a3e5c 100644 --- a/src/meta-srv/src/procedure/region_failover.rs +++ b/src/meta-srv/src/procedure/region_failover.rs @@ -44,7 +44,7 @@ use snafu::ResultExt; use store_api::storage::{RegionId, RegionNumber}; use table::metadata::TableId; -use crate::error::{Error, RegisterProcedureLoaderSnafu, Result, TableMetadataManagerSnafu}; +use crate::error::{RegisterProcedureLoaderSnafu, Result, TableMetadataManagerSnafu}; use crate::lock::DistLockRef; use crate::metasrv::{SelectorContext, SelectorRef}; use crate::service::mailbox::MailboxRef; @@ -357,7 +357,7 @@ impl Procedure for RegionFailoverProcedure { .next(&self.context, &self.node.failed_region) .await .map_err(|e| { - if matches!(e, Error::RetryLater { .. }) { + if e.is_retryable() { ProcedureError::retry_later(e) } else { ProcedureError::external(e) @@ -780,7 +780,8 @@ mod tests { let result = procedure.execute(&ctx).await; assert!(result.is_err()); - assert!(result.unwrap_err().is_retry_later()); + let err = result.unwrap_err(); + assert!(err.is_retry_later(), "err: {:?}", err); assert_eq!( r#"{"region_failover_state":"RegionFailoverStart","failover_candidate":null}"#, serde_json::to_string(&procedure.node.state).unwrap() diff --git a/src/meta-srv/src/procedure/region_failover/failover_start.rs b/src/meta-srv/src/procedure/region_failover/failover_start.rs index 003cd7e4b9c8..aaa1c9949857 100644 --- a/src/meta-srv/src/procedure/region_failover/failover_start.rs +++ b/src/meta-srv/src/procedure/region_failover/failover_start.rs @@ -13,17 +13,17 @@ // limitations under the License. use async_trait::async_trait; -use common_error::ext::ErrorExt; +use common_error::ext::{BoxedError, ErrorExt}; use common_error::status_code::StatusCode; use common_meta::peer::Peer; use common_meta::RegionIdent; use common_telemetry::info; use serde::{Deserialize, Serialize}; -use snafu::ensure; +use snafu::{ensure, location, Location}; use super::deactivate_region::DeactivateRegion; use super::{RegionFailoverContext, State}; -use crate::error::{RegionFailoverCandidatesNotFoundSnafu, Result, RetryLaterSnafu}; +use crate::error::{self, RegionFailoverCandidatesNotFoundSnafu, Result}; use crate::selector::SelectorOptions; #[derive(Serialize, Deserialize, Debug)] @@ -93,10 +93,11 @@ impl State for RegionFailoverStart { .await .map_err(|e| { if e.status_code() == StatusCode::RuntimeResourcesExhausted { - RetryLaterSnafu { - reason: format!("{e}"), + error::Error::RetryLaterWithSource { + reason: format!("Region failover aborted for {failed_region:?}"), + location: location!(), + source: BoxedError::new(e), } - .build() } else { e } diff --git a/src/meta-srv/src/procedure/region_failover/update_metadata.rs b/src/meta-srv/src/procedure/region_failover/update_metadata.rs index 2548f03763f0..542b02ca08e7 100644 --- a/src/meta-srv/src/procedure/region_failover/update_metadata.rs +++ b/src/meta-srv/src/procedure/region_failover/update_metadata.rs @@ -15,6 +15,7 @@ use std::collections::HashMap; use async_trait::async_trait; +use common_error::ext::BoxedError; use common_meta::key::datanode_table::RegionInfo; use common_meta::key::table_route::TableRouteKey; use common_meta::peer::Peer; @@ -27,7 +28,7 @@ use store_api::storage::RegionNumber; use super::invalidate_cache::InvalidateCache; use super::{RegionFailoverContext, State}; -use crate::error::{self, Result, RetryLaterSnafu, TableRouteNotFoundSnafu}; +use crate::error::{self, Result, TableRouteNotFoundSnafu}; use crate::lock::keys::table_metadata_lock_key; use crate::lock::Opts; @@ -172,14 +173,12 @@ impl State for UpdateRegionMetadata { ) -> Result> { self.update_metadata(ctx, failed_region) .await - .map_err(|e| { - RetryLaterSnafu { - reason: format!( - "Failed to update metadata for failed region: {}, error: {}", - failed_region, e - ), - } - .build() + .map_err(BoxedError::new) + .context(error::RetryLaterWithSourceSnafu { + reason: format!( + "Failed to update metadata for failed region: {}", + failed_region + ), })?; Ok(Box::new(InvalidateCache)) } diff --git a/src/meta-srv/src/procedure/region_migration.rs b/src/meta-srv/src/procedure/region_migration.rs index ff5d0ecc45f7..d1c73597b79a 100644 --- a/src/meta-srv/src/procedure/region_migration.rs +++ b/src/meta-srv/src/procedure/region_migration.rs @@ -30,6 +30,7 @@ use std::fmt::Debug; use std::time::Duration; use api::v1::meta::MailboxMessage; +use common_error::ext::BoxedError; use common_meta::instruction::Instruction; use common_meta::key::datanode_table::{DatanodeTableKey, DatanodeTableValue}; use common_meta::key::table_info::TableInfoValue; @@ -45,12 +46,12 @@ use common_procedure::error::{ use common_procedure::{Context as ProcedureContext, LockKey, Procedure, Status, StringKey}; pub use manager::RegionMigrationProcedureTask; use serde::{Deserialize, Serialize}; -use snafu::{location, Location, OptionExt, ResultExt}; +use snafu::{OptionExt, ResultExt}; use store_api::storage::RegionId; use tokio::time::Instant; use self::migration_start::RegionMigrationStart; -use crate::error::{self, Error, Result}; +use crate::error::{self, Result}; use crate::service::mailbox::{BroadcastChannel, MailboxRef}; /// It's shared in each step and available even after recovering. @@ -220,9 +221,9 @@ impl Context { .get(table_id) .await .context(error::TableMetadataManagerSnafu) - .map_err(|e| error::Error::RetryLater { - reason: e.to_string(), - location: location!(), + .map_err(BoxedError::new) + .context(error::RetryLaterWithSourceSnafu { + reason: format!("Failed to get TableRoute: {table_id}"), })? .context(error::TableRouteNotFoundSnafu { table_id })?; @@ -256,9 +257,9 @@ impl Context { .get(table_id) .await .context(error::TableMetadataManagerSnafu) - .map_err(|e| error::Error::RetryLater { - reason: e.to_string(), - location: location!(), + .map_err(BoxedError::new) + .context(error::RetryLaterWithSourceSnafu { + reason: format!("Failed to get TableInfo: {table_id}"), })? .context(error::TableInfoNotFoundSnafu { table_id })?; @@ -289,9 +290,9 @@ impl Context { }) .await .context(error::TableMetadataManagerSnafu) - .map_err(|e| error::Error::RetryLater { - reason: e.to_string(), - location: location!(), + .map_err(BoxedError::new) + .context(error::RetryLaterWithSourceSnafu { + reason: format!("Failed to get DatanodeTable: ({datanode_id},{table_id})"), })? .context(error::DatanodeTableNotFoundSnafu { table_id, @@ -412,7 +413,7 @@ impl Procedure for RegionMigrationProcedure { let state = &mut self.state; let (next, status) = state.next(&mut self.context).await.map_err(|e| { - if matches!(e, Error::RetryLater { .. }) { + if e.is_retryable() { ProcedureError::retry_later(e) } else { ProcedureError::external(e) diff --git a/src/meta-srv/src/procedure/region_migration/downgrade_leader_region.rs b/src/meta-srv/src/procedure/region_migration/downgrade_leader_region.rs index 73ded293657e..8d1c439da46e 100644 --- a/src/meta-srv/src/procedure/region_migration/downgrade_leader_region.rs +++ b/src/meta-srv/src/procedure/region_migration/downgrade_leader_region.rs @@ -374,7 +374,7 @@ mod tests { assert_matches!(err, Error::RetryLater { .. }); assert!(err.is_retryable()); - assert!(err.to_string().contains("test mocked")); + assert!(format!("{err:?}").contains("test mocked"), "err: {err:?}",); } #[tokio::test] diff --git a/src/meta-srv/src/procedure/region_migration/open_candidate_region.rs b/src/meta-srv/src/procedure/region_migration/open_candidate_region.rs index 0335a447c7e7..3ef485c09a6b 100644 --- a/src/meta-srv/src/procedure/region_migration/open_candidate_region.rs +++ b/src/meta-srv/src/procedure/region_migration/open_candidate_region.rs @@ -383,7 +383,7 @@ mod tests { assert_matches!(err, Error::RetryLater { .. }); assert!(err.is_retryable()); - assert!(err.to_string().contains("test mocked")); + assert!(format!("{err:?}").contains("test mocked")); } #[tokio::test] diff --git a/src/meta-srv/src/procedure/region_migration/update_metadata/downgrade_leader_region.rs b/src/meta-srv/src/procedure/region_migration/update_metadata/downgrade_leader_region.rs index f764a424e864..e018053407a6 100644 --- a/src/meta-srv/src/procedure/region_migration/update_metadata/downgrade_leader_region.rs +++ b/src/meta-srv/src/procedure/region_migration/update_metadata/downgrade_leader_region.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use common_error::ext::BoxedError; use common_meta::rpc::router::RegionStatus; use snafu::ResultExt; @@ -61,9 +62,11 @@ impl UpdateMetadata { .context(error::TableMetadataManagerSnafu) { ctx.remove_table_route_value(); - return error::RetryLaterSnafu { - reason: format!("Failed to update the table route during the downgrading leader region, error: {err}") - }.fail(); + return Err(BoxedError::new(err)).context(error::RetryLaterWithSourceSnafu { + reason: format!( + "Failed to update the table route during the downgrading leader region, region_id: {region_id}, from_peer_id: {from_peer_id}" + ), + }); } ctx.remove_table_route_value(); @@ -163,13 +166,9 @@ mod tests { ctx.volatile_ctx.table_route = Some(original_table_route); let err = state.downgrade_leader_region(&mut ctx).await.unwrap_err(); - assert!(ctx.volatile_ctx.table_route.is_none()); - - assert_matches!(err, Error::RetryLater { .. }); - assert!(err.is_retryable()); - assert!(err.to_string().contains("Failed to update the table route")); + assert!(format!("{err:?}").contains("Failed to update the table route")); } #[tokio::test] diff --git a/src/meta-srv/src/procedure/region_migration/update_metadata/rollback_downgraded_region.rs b/src/meta-srv/src/procedure/region_migration/update_metadata/rollback_downgraded_region.rs index 6d066a1b863f..81e73e270af7 100644 --- a/src/meta-srv/src/procedure/region_migration/update_metadata/rollback_downgraded_region.rs +++ b/src/meta-srv/src/procedure/region_migration/update_metadata/rollback_downgraded_region.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use common_error::ext::BoxedError; use snafu::ResultExt; use crate::error::{self, Result}; @@ -45,9 +46,9 @@ impl UpdateMetadata { .context(error::TableMetadataManagerSnafu) { ctx.remove_table_route_value(); - return error::RetryLaterSnafu { - reason: format!("Failed to update the table route during the rollback downgraded leader region, error: {err}") - }.fail(); + return Err(BoxedError::new(err)).context(error::RetryLaterWithSourceSnafu { + reason: format!("Failed to update the table route during the rollback downgraded leader region: {region_id}"), + }); } ctx.remove_table_route_value(); @@ -157,9 +158,8 @@ mod tests { .await .unwrap_err(); assert!(ctx.volatile_ctx.table_route.is_none()); - assert_matches!(err, Error::RetryLater { .. }); assert!(err.is_retryable()); - assert!(err.to_string().contains("Failed to update the table route")); + assert!(format!("{err:?}").contains("Failed to update the table route")); state.rollback_downgraded_region(&mut ctx).await.unwrap(); diff --git a/src/meta-srv/src/procedure/region_migration/update_metadata/upgrade_candidate_region.rs b/src/meta-srv/src/procedure/region_migration/update_metadata/upgrade_candidate_region.rs index e69d0cd714e1..62d7f92320fe 100644 --- a/src/meta-srv/src/procedure/region_migration/update_metadata/upgrade_candidate_region.rs +++ b/src/meta-srv/src/procedure/region_migration/update_metadata/upgrade_candidate_region.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use common_error::ext::BoxedError; use common_meta::key::datanode_table::RegionInfo; use common_meta::rpc::router::{region_distribution, RegionRoute}; use common_telemetry::{info, warn}; @@ -168,9 +169,9 @@ impl UpdateMetadata { .context(error::TableMetadataManagerSnafu) { ctx.remove_table_route_value(); - return error::RetryLaterSnafu { - reason: format!("Failed to update the table route during the upgrading candidate region, error: {err}") - }.fail(); + return Err(BoxedError::new(err)).context(error::RetryLaterWithSourceSnafu { + reason: format!("Failed to update the table route during the upgrading candidate region: {region_id}"), + }); }; ctx.remove_table_route_value(); @@ -354,15 +355,12 @@ mod tests { .register(2, RegionId::new(table_id, 1)) .unwrap(); ctx.volatile_ctx.opening_region_guard = Some(guard); - let err = state.upgrade_candidate_region(&mut ctx).await.unwrap_err(); assert!(ctx.volatile_ctx.table_route.is_none()); assert!(ctx.volatile_ctx.opening_region_guard.is_some()); - assert_matches!(err, Error::RetryLater { .. }); - assert!(err.is_retryable()); - assert!(err.to_string().contains("Failed to update the table route")); + assert!(format!("{err:?}").contains("Failed to update the table route")); } #[tokio::test] diff --git a/src/meta-srv/src/procedure/region_migration/upgrade_candidate_region.rs b/src/meta-srv/src/procedure/region_migration/upgrade_candidate_region.rs index 514be461a8f0..bf1a3e6b1f20 100644 --- a/src/meta-srv/src/procedure/region_migration/upgrade_candidate_region.rs +++ b/src/meta-srv/src/procedure/region_migration/upgrade_candidate_region.rs @@ -336,7 +336,7 @@ mod tests { assert_matches!(err, Error::RetryLater { .. }); assert!(err.is_retryable()); - assert!(err.to_string().contains("test mocked")); + assert!(format!("{err:?}").contains("test mocked")); } #[tokio::test] @@ -398,7 +398,7 @@ mod tests { assert_matches!(err, Error::RetryLater { .. }); assert!(err.is_retryable()); - assert!(err.to_string().contains("still replaying the wal")); + assert!(format!("{err:?}").contains("still replaying the wal")); // Sets the `require_ready` to false. state.require_ready = false; diff --git a/src/meta-srv/src/procedure/utils.rs b/src/meta-srv/src/procedure/utils.rs index 8cf2a34c612d..094ab4d14c0e 100644 --- a/src/meta-srv/src/procedure/utils.rs +++ b/src/meta-srv/src/procedure/utils.rs @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#[cfg(feature = "mock")] +#[cfg(any(test, feature = "mock"))] pub mod mock { use std::io::Error; use std::sync::Arc;