Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor(api): Add more components to healthcheck – follow-ups #3337

Merged
merged 4 commits into from
Dec 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 1 addition & 14 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,6 @@ members = [
# Test infrastructure
"core/tests/loadnext",
"core/tests/vm-benchmark",
"core/lib/bin_metadata",
]
resolver = "2"

Expand Down
1 change: 0 additions & 1 deletion core/bin/external_node/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ zksync_health_check.workspace = true
zksync_web3_decl.workspace = true
zksync_types.workspace = true
zksync_block_reverter.workspace = true
zksync_shared_metrics.workspace = true
zksync_node_genesis.workspace = true
zksync_node_fee_model.workspace = true
zksync_node_db_pruner.workspace = true
Expand Down
3 changes: 0 additions & 3 deletions core/bin/external_node/src/metrics/framework.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ use zksync_node_framework::{
implementations::resources::pools::{MasterPool, PoolResource},
FromContext, IntoContext, StopReceiver, Task, TaskId, WiringError, WiringLayer,
};
use zksync_shared_metrics::{GIT_METRICS, RUST_METRICS};
use zksync_types::{L1ChainId, L2ChainId, SLChainId};

use super::EN_METRICS;
Expand Down Expand Up @@ -39,8 +38,6 @@ impl WiringLayer for ExternalNodeMetricsLayer {
}

async fn wire(self, input: Self::Input) -> Result<Self::Output, WiringError> {
RUST_METRICS.initialize();
GIT_METRICS.initialize();
EN_METRICS.observe_config(
self.l1_chain_id,
self.sl_chain_id,
Expand Down
18 changes: 0 additions & 18 deletions core/lib/bin_metadata/Cargo.toml

This file was deleted.

8 changes: 4 additions & 4 deletions core/lib/dal/src/system_dal.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use std::{collections::HashMap, time::Duration};

use chrono::DateTime;
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use zksync_db_connection::{connection::Connection, error::DalResult, instrument::InstrumentExt};

Expand All @@ -14,11 +14,11 @@ pub(crate) struct TableSize {
pub total_size: u64,
}

#[derive(Debug, Serialize, Deserialize)]
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DatabaseMigration {
pub version: i64,
pub description: String,
pub installed_on: DateTime<chrono::Utc>,
pub installed_on: DateTime<Utc>,
pub success: bool,
pub checksum: String,
pub execution_time: Duration,
Expand Down Expand Up @@ -118,7 +118,7 @@ impl SystemDal<'_, '_> {
installed_on: row.installed_on,
success: row.success,
checksum: hex::encode(row.checksum),
execution_time: Duration::from_millis(u64::try_from(row.execution_time).unwrap_or(0)),
execution_time: Duration::from_nanos(u64::try_from(row.execution_time).unwrap_or(0)),
})
}
}
1 change: 0 additions & 1 deletion core/lib/health_check/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ serde_json.workspace = true
thiserror.workspace = true
tokio = { workspace = true, features = ["sync", "time"] }
tracing.workspace = true
zksync_bin_metadata.workspace = true

[dev-dependencies]
assert_matches.workspace = true
Expand Down
21 changes: 0 additions & 21 deletions core/lib/health_check/src/binary.rs

This file was deleted.

19 changes: 14 additions & 5 deletions core/lib/health_check/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,9 @@ pub use async_trait::async_trait;
use futures::future;
use serde::Serialize;
use tokio::sync::watch;
use zksync_bin_metadata::BIN_METADATA;

use self::metrics::{CheckResult, METRICS};
use crate::metrics::AppHealthCheckConfig;
use crate::metrics::{AppHealthCheckConfig, CheckResult, METRICS};

mod binary;
mod metrics;

#[cfg(test)]
Expand Down Expand Up @@ -114,6 +111,8 @@ pub struct AppHealthCheck {

#[derive(Debug, Clone)]
struct AppHealthCheckInner {
/// Application-level health details.
app_details: Option<serde_json::Value>,
components: Vec<Arc<dyn CheckHealth>>,
slow_time_limit: Duration,
hard_time_limit: Duration,
Expand All @@ -136,6 +135,7 @@ impl AppHealthCheck {

let inner = AppHealthCheckInner {
components: Vec::default(),
app_details: None,
slow_time_limit,
hard_time_limit,
};
Expand Down Expand Up @@ -181,6 +181,13 @@ impl AppHealthCheck {
}
}

/// Sets app-level health details. They can include build info etc.
pub fn set_details(&self, details: impl Serialize) {
let details = serde_json::to_value(details).expect("failed serializing app details");
let mut inner = self.inner.lock().expect("`AppHealthCheck` is poisoned");
inner.app_details = Some(details);
}

/// Inserts health check for a component.
///
/// # Errors
Expand Down Expand Up @@ -220,6 +227,7 @@ impl AppHealthCheck {
// Clone `inner` so that we don't hold a lock for them across a wait point.
let AppHealthCheckInner {
components,
app_details,
slow_time_limit,
hard_time_limit,
} = self
Expand All @@ -238,7 +246,8 @@ impl AppHealthCheck {
.map(|health| health.status)
.max_by_key(|status| status.priority_for_aggregation())
.unwrap_or(HealthStatus::Ready);
let inner = Health::with_details(aggregated_status.into(), BIN_METADATA);
let mut inner = Health::from(aggregated_status);
inner.details = app_details.clone();

let health = AppHealth { inner, components };
if !health.inner.status.is_healthy() {
Expand Down
1 change: 1 addition & 0 deletions core/lib/health_check/src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ async fn aggregating_health_checks() {
let (first_check, first_updater) = ReactiveHealthCheck::new("first");
let (second_check, second_updater) = ReactiveHealthCheck::new("second");
let inner = AppHealthCheckInner {
app_details: None,
components: vec![Arc::new(first_check), Arc::new(second_check)],
slow_time_limit: AppHealthCheck::DEFAULT_SLOW_TIME_LIMIT,
hard_time_limit: AppHealthCheck::DEFAULT_HARD_TIME_LIMIT,
Expand Down
1 change: 0 additions & 1 deletion core/node/node_framework/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@ zksync_vm_executor.workspace = true
zksync_state_keeper.workspace = true
zksync_consistency_checker.workspace = true
zksync_metadata_calculator.workspace = true
zksync_bin_metadata.workspace = true
zksync_node_sync.workspace = true
zksync_node_api_server.workspace = true
zksync_node_consensus.workspace = true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ use std::sync::Arc;
use zksync_config::configs::api::HealthCheckConfig;
use zksync_health_check::AppHealthCheck;
use zksync_node_api_server::healthcheck::HealthCheckHandle;
use zksync_shared_metrics::metadata::{GitMetadata, RustMetadata, GIT_METRICS, RUST_METRICS};
use zksync_web3_decl::jsonrpsee::core::Serialize;

use crate::{
implementations::resources::healthcheck::AppHealthCheckResource,
Expand All @@ -12,6 +14,13 @@ use crate::{
FromContext, IntoContext,
};

/// Full metadata of the compiled binary.
#[derive(Debug, Serialize)]
pub struct BinMetadata {
pub rust: &'static RustMetadata,
pub git: &'static GitMetadata,
}

/// Wiring layer for health check server
///
/// Expects other layers to insert different components' health checks
Expand Down Expand Up @@ -73,8 +82,12 @@ impl Task for HealthCheckTask {
}

async fn run(mut self: Box<Self>, mut stop_receiver: StopReceiver) -> anyhow::Result<()> {
self.app_health_check.set_details(BinMetadata {
rust: RUST_METRICS.initialize(),
git: GIT_METRICS.initialize(),
});
let handle =
HealthCheckHandle::spawn_server(self.config.bind_addr(), self.app_health_check.clone());
HealthCheckHandle::spawn_server(self.config.bind_addr(), self.app_health_check);
stop_receiver.0.changed().await?;
handle.stop().await;

Expand Down
Loading
Loading