-
Notifications
You must be signed in to change notification settings - Fork 333
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
fix: ensure table route metadata is eventually rolled back on failure (…
…#5174) * fix: ensure table route metadata is eventually rolled back on procedure failure * fix(fuzz): enhance procedure condition checking * chore: add logs * feat: close downgraded leader region actively * chore: apply suggestions from CR
- Loading branch information
1 parent
4bdc704
commit dad31aa
Showing
10 changed files
with
274 additions
and
71 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
138 changes: 138 additions & 0 deletions
138
src/meta-srv/src/procedure/region_migration/close_downgraded_region.rs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,138 @@ | ||
// Copyright 2023 Greptime Team | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
use std::any::Any; | ||
use std::time::Duration; | ||
|
||
use api::v1::meta::MailboxMessage; | ||
use common_meta::distributed_time_constants::MAILBOX_RTT_SECS; | ||
use common_meta::instruction::{Instruction, InstructionReply, SimpleReply}; | ||
use common_meta::key::datanode_table::RegionInfo; | ||
use common_meta::RegionIdent; | ||
use common_procedure::Status; | ||
use common_telemetry::{info, warn}; | ||
use serde::{Deserialize, Serialize}; | ||
use snafu::ResultExt; | ||
|
||
use crate::error::{self, Result}; | ||
use crate::handler::HeartbeatMailbox; | ||
use crate::procedure::region_migration::migration_end::RegionMigrationEnd; | ||
use crate::procedure::region_migration::{Context, State}; | ||
use crate::service::mailbox::Channel; | ||
|
||
const CLOSE_DOWNGRADED_REGION_TIMEOUT: Duration = Duration::from_secs(MAILBOX_RTT_SECS); | ||
|
||
#[derive(Debug, Serialize, Deserialize)] | ||
pub struct CloseDowngradedRegion; | ||
|
||
#[async_trait::async_trait] | ||
#[typetag::serde] | ||
impl State for CloseDowngradedRegion { | ||
async fn next(&mut self, ctx: &mut Context) -> Result<(Box<dyn State>, Status)> { | ||
if let Err(err) = self.close_downgraded_leader_region(ctx).await { | ||
let downgrade_leader_datanode = &ctx.persistent_ctx.from_peer; | ||
let region_id = ctx.region_id(); | ||
warn!(err; "Failed to close downgraded leader region: {region_id} on datanode {:?}", downgrade_leader_datanode); | ||
} | ||
|
||
Ok((Box::new(RegionMigrationEnd), Status::done())) | ||
} | ||
|
||
fn as_any(&self) -> &dyn Any { | ||
self | ||
} | ||
} | ||
|
||
impl CloseDowngradedRegion { | ||
/// Builds close region instruction. | ||
/// | ||
/// Abort(non-retry): | ||
/// - Datanode Table is not found. | ||
async fn build_close_region_instruction(&self, ctx: &mut Context) -> Result<Instruction> { | ||
let pc = &ctx.persistent_ctx; | ||
let downgrade_leader_datanode_id = pc.from_peer.id; | ||
let cluster_id = pc.cluster_id; | ||
let table_id = pc.region_id.table_id(); | ||
let region_number = pc.region_id.region_number(); | ||
let datanode_table_value = ctx.get_from_peer_datanode_table_value().await?; | ||
|
||
let RegionInfo { engine, .. } = datanode_table_value.region_info.clone(); | ||
|
||
Ok(Instruction::CloseRegion(RegionIdent { | ||
cluster_id, | ||
datanode_id: downgrade_leader_datanode_id, | ||
table_id, | ||
region_number, | ||
engine, | ||
})) | ||
} | ||
|
||
/// Closes the downgraded leader region. | ||
async fn close_downgraded_leader_region(&self, ctx: &mut Context) -> Result<()> { | ||
let close_instruction = self.build_close_region_instruction(ctx).await?; | ||
let region_id = ctx.region_id(); | ||
let pc = &ctx.persistent_ctx; | ||
let downgrade_leader_datanode = &pc.from_peer; | ||
let msg = MailboxMessage::json_message( | ||
&format!("Close downgraded region: {}", region_id), | ||
&format!("Meta@{}", ctx.server_addr()), | ||
&format!( | ||
"Datanode-{}@{}", | ||
downgrade_leader_datanode.id, downgrade_leader_datanode.addr | ||
), | ||
common_time::util::current_time_millis(), | ||
&close_instruction, | ||
) | ||
.with_context(|_| error::SerializeToJsonSnafu { | ||
input: close_instruction.to_string(), | ||
})?; | ||
|
||
let ch = Channel::Datanode(downgrade_leader_datanode.id); | ||
let receiver = ctx | ||
.mailbox | ||
.send(&ch, msg, CLOSE_DOWNGRADED_REGION_TIMEOUT) | ||
.await?; | ||
|
||
match receiver.await? { | ||
Ok(msg) => { | ||
let reply = HeartbeatMailbox::json_reply(&msg)?; | ||
info!( | ||
"Received close downgraded leade region reply: {:?}, region: {}", | ||
reply, region_id | ||
); | ||
let InstructionReply::CloseRegion(SimpleReply { result, error }) = reply else { | ||
return error::UnexpectedInstructionReplySnafu { | ||
mailbox_message: msg.to_string(), | ||
reason: "expect close region reply", | ||
} | ||
.fail(); | ||
}; | ||
|
||
if result { | ||
Ok(()) | ||
} else { | ||
error::UnexpectedSnafu { | ||
violated: format!( | ||
"Failed to close downgraded leader region: {region_id} on datanode {:?}, error: {error:?}", | ||
downgrade_leader_datanode, | ||
), | ||
} | ||
.fail() | ||
} | ||
} | ||
|
||
Err(e) => Err(e), | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.