diff --git a/src/datanode/src/heartbeat.rs b/src/datanode/src/heartbeat.rs index 6c2bca43994f..04e9d9ac5b6f 100644 --- a/src/datanode/src/heartbeat.rs +++ b/src/datanode/src/heartbeat.rs @@ -37,7 +37,7 @@ use crate::alive_keeper::RegionAliveKeeper; use crate::config::DatanodeOptions; use crate::error::{self, MetaClientInitSnafu, Result}; use crate::event_listener::RegionServerEventReceiver; -use crate::metrics; +use crate::metrics::{self, HEARTBEAT_RECV_COUNT, HEARTBEAT_SENT_COUNT}; use crate::region_server::RegionServer; pub(crate) mod handler; @@ -231,10 +231,12 @@ impl HeartbeatTask { mailbox_message: Some(message), ..Default::default() }; + HEARTBEAT_RECV_COUNT.with_label_values(&["success"]).inc(); Some(req) } Err(e) => { error!(e; "Failed to encode mailbox messages!"); + HEARTBEAT_RECV_COUNT.with_label_values(&["error"]).inc(); None } } @@ -304,6 +306,8 @@ impl HeartbeatTask { error!(e; "Failed to reconnect to metasrv!"); } } + } else { + HEARTBEAT_SENT_COUNT.inc(); } } } diff --git a/src/datanode/src/metrics.rs b/src/datanode/src/metrics.rs index 1c660a315c07..d11e8af9fe35 100644 --- a/src/datanode/src/metrics.rs +++ b/src/datanode/src/metrics.rs @@ -54,4 +54,17 @@ lazy_static! { &[REGION_ROLE] ) .unwrap(); + /// The number of heartbeats send by datanode. + pub static ref HEARTBEAT_SENT_COUNT: IntCounter = register_int_counter!( + "greptime_datanode_heartbeat_send_count", + "datanode heartbeat sent", + ) + .unwrap(); + /// The number of heartbeats received by datanode, labeled with result type. + pub static ref HEARTBEAT_RECV_COUNT: IntCounterVec = register_int_counter_vec!( + "greptime_datanode_heartbeat_recv_count", + "datanode heartbeat received", + &["result"] + ) + .unwrap(); } diff --git a/src/frontend/src/heartbeat.rs b/src/frontend/src/heartbeat.rs index 1bea71e87e3e..dea52d8f36c3 100644 --- a/src/frontend/src/heartbeat.rs +++ b/src/frontend/src/heartbeat.rs @@ -32,6 +32,7 @@ use tokio::time::{Duration, Instant}; use crate::error; use crate::error::Result; use crate::frontend::FrontendOptions; +use crate::metrics::{HEARTBEAT_RECV_COUNT, HEARTBEAT_SENT_COUNT}; pub mod handler; @@ -94,10 +95,14 @@ impl HeartbeatTask { let ctx = HeartbeatResponseHandlerContext::new(mailbox.clone(), resp); if let Err(e) = capture_self.handle_response(ctx).await { error!(e; "Error while handling heartbeat response"); + HEARTBEAT_RECV_COUNT.with_label_values(&["processing_error"]).inc(); + } else { + HEARTBEAT_RECV_COUNT.with_label_values(&["success"]).inc(); } } Ok(None) => break, Err(e) => { + HEARTBEAT_RECV_COUNT.with_label_values(&["error"]).inc(); error!(e; "Occur error while reading heartbeat response"); capture_self .start_with_retry(Duration::from_millis(retry_interval)) @@ -180,6 +185,7 @@ impl HeartbeatTask { error!(e; "Failed to send heartbeat to metasrv"); break; } else { + HEARTBEAT_SENT_COUNT.inc(); debug!("Send a heartbeat request to metasrv, content: {:?}", req); } } diff --git a/src/frontend/src/metrics.rs b/src/frontend/src/metrics.rs index db9d53ac19aa..33580c550e8b 100644 --- a/src/frontend/src/metrics.rs +++ b/src/frontend/src/metrics.rs @@ -51,4 +51,17 @@ lazy_static! { "frontend otlp traces rows" ) .unwrap(); + /// The number of heartbeats send by frontend node. + pub static ref HEARTBEAT_SENT_COUNT: IntCounter = register_int_counter!( + "greptime_frontend_heartbeat_send_count", + "frontend heartbeat sent", + ) + .unwrap(); + /// The number of heartbeats received by frontend node, labeled with result type. + pub static ref HEARTBEAT_RECV_COUNT: IntCounterVec = register_int_counter_vec!( + "greptime_frontend_heartbeat_recv_count", + "frontend heartbeat received", + &["result"] + ) + .unwrap(); } diff --git a/src/meta-srv/src/metrics.rs b/src/meta-srv/src/metrics.rs index ca35b290c279..07d0a4a12d11 100644 --- a/src/meta-srv/src/metrics.rs +++ b/src/meta-srv/src/metrics.rs @@ -45,4 +45,7 @@ lazy_static! { /// Meta kv cache miss counter. pub static ref METRIC_META_KV_CACHE_MISS: IntCounterVec = register_int_counter_vec!("greptime_meta_kv_cache_miss", "meta kv cache miss", &["op"]).unwrap(); + // Heartbeat received by metasrv. + pub static ref METRIC_META_HEARTBEAT_RECV: IntCounterVec = + register_int_counter_vec!("greptime_meta_heartbeat_recv", "heartbeats received by metasrv", &["pusher_key"]).unwrap(); } diff --git a/src/meta-srv/src/service/heartbeat.rs b/src/meta-srv/src/service/heartbeat.rs index 358644c51042..f5ac74a4b580 100644 --- a/src/meta-srv/src/service/heartbeat.rs +++ b/src/meta-srv/src/service/heartbeat.rs @@ -32,6 +32,7 @@ use crate::error; use crate::error::Result; use crate::handler::{HeartbeatHandlerGroup, Pusher}; use crate::metasrv::{Context, Metasrv}; +use crate::metrics::METRIC_META_HEARTBEAT_RECV; use crate::service::{GrpcResult, GrpcStream}; #[async_trait::async_trait] @@ -65,7 +66,11 @@ impl heartbeat_server::Heartbeat for Metasrv { if pusher_key.is_none() { pusher_key = register_pusher(&handler_group, header, tx.clone()).await; } - + if let Some(k) = &pusher_key { + METRIC_META_HEARTBEAT_RECV.with_label_values(&[k]); + } else { + METRIC_META_HEARTBEAT_RECV.with_label_values(&["none"]); + } let res = handler_group .handle(req, ctx.clone()) .await