diff --git a/src/common/telemetry/src/lib.rs b/src/common/telemetry/src/lib.rs index 12f0098a158c..e63d6e8af4c8 100644 --- a/src/common/telemetry/src/lib.rs +++ b/src/common/telemetry/src/lib.rs @@ -21,7 +21,7 @@ mod panic_hook; pub mod tracing_context; mod tracing_sampler; -pub use logging::{init_default_ut_logging, init_global_logging}; +pub use logging::{init_default_ut_logging, init_global_logging, RELOAD_HANDLE}; pub use metric::dump_metrics; pub use panic_hook::set_panic_hook; -pub use {common_error, tracing}; +pub use {common_error, tracing, tracing_subscriber}; diff --git a/src/common/telemetry/src/logging.rs b/src/common/telemetry/src/logging.rs index 7047a8993c61..de018aa4b6f3 100644 --- a/src/common/telemetry/src/logging.rs +++ b/src/common/telemetry/src/logging.rs @@ -16,7 +16,7 @@ use std::env; use std::sync::{Arc, Mutex, Once}; -use once_cell::sync::Lazy; +use once_cell::sync::{Lazy, OnceCell}; use opentelemetry::{global, KeyValue}; use opentelemetry_otlp::WithExportConfig; use opentelemetry_sdk::propagation::TraceContextPropagator; @@ -26,6 +26,7 @@ use serde::{Deserialize, Serialize}; use tracing_appender::non_blocking::WorkerGuard; use tracing_appender::rolling::{RollingFileAppender, Rotation}; use tracing_log::LogTracer; +use tracing_subscriber::filter::Targets; use tracing_subscriber::fmt::Layer; use tracing_subscriber::layer::SubscriberExt; use tracing_subscriber::prelude::*; @@ -35,6 +36,10 @@ use crate::tracing_sampler::{create_sampler, TracingSampleOptions}; pub const DEFAULT_OTLP_ENDPOINT: &str = "http://localhost:4317"; +// Handle for reloading log level +pub static RELOAD_HANDLE: OnceCell> = + OnceCell::new(); + /// The logging options that used to initialize the logger. #[derive(Clone, Debug, Serialize, Deserialize)] #[serde(default)] @@ -242,6 +247,12 @@ pub fn init_global_logging( .parse::() .expect("error parsing log level string"); + let (dyn_filter, reload_handle) = tracing_subscriber::reload::Layer::new(filter.clone()); + + RELOAD_HANDLE + .set(reload_handle) + .expect("reload handle already set, maybe init_global_logging get called twice?"); + // Must enable 'tokio_unstable' cfg to use this feature. // For example: `RUSTFLAGS="--cfg tokio_unstable" cargo run -F common-telemetry/console -- standalone start` #[cfg(feature = "tokio-console")] @@ -263,7 +274,7 @@ pub fn init_global_logging( }; Registry::default() - .with(filter) + .with(dyn_filter) .with(tokio_console_layer) .with(stdout_logging_layer) .with(file_logging_layer) @@ -275,7 +286,7 @@ pub fn init_global_logging( #[cfg(not(feature = "tokio-console"))] let subscriber = Registry::default() - .with(filter) + .with(dyn_filter) .with(stdout_logging_layer) .with(file_logging_layer) .with(err_file_logging_layer); diff --git a/src/servers/src/http.rs b/src/servers/src/http.rs index 2b959f91f3c6..ad4ff5222561 100644 --- a/src/servers/src/http.rs +++ b/src/servers/src/http.rs @@ -74,6 +74,7 @@ use crate::query_handler::{ use crate::server::Server; pub mod authorize; +pub mod dyn_log; pub mod event; pub mod handler; pub mod header; @@ -708,6 +709,15 @@ impl HttpServer { authorize::check_http_auth, )), ) + .nest( + "/debug", + Router::new() + // handler for changing log level dynamically + .route( + "/log_level", + routing::get(dyn_log::dyn_log_handler).post(dyn_log::dyn_log_handler), + ), + ) // Handlers for debug, we don't expect a timeout. .nest( &format!("/{HTTP_API_VERSION}/prof"), diff --git a/src/servers/src/http/dyn_log.rs b/src/servers/src/http/dyn_log.rs new file mode 100644 index 000000000000..a34601aff9d0 --- /dev/null +++ b/src/servers/src/http/dyn_log.rs @@ -0,0 +1,54 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use axum::http::StatusCode; +use axum::response::IntoResponse; +use common_telemetry::tracing_subscriber::filter; +use common_telemetry::{info, RELOAD_HANDLE}; +use snafu::OptionExt; + +use crate::error::{InternalSnafu, InvalidParameterSnafu, Result}; + +#[axum_macros::debug_handler] +pub async fn dyn_log_handler(level: String) -> Result { + let new_filter = level.parse::().map_err(|e| { + InvalidParameterSnafu { + reason: format!("Invalid filter \"{level}\": {e:?}"), + } + .build() + })?; + let mut old_filter = None; + RELOAD_HANDLE + .get() + .context(InternalSnafu { + err_msg: "Reload handle not initialized", + })? + .modify(|filter| { + old_filter = Some(filter.clone()); + *filter = new_filter.clone() + }) + .map_err(|e| { + InternalSnafu { + err_msg: format!("Fail to modify filter: {e:?}"), + } + .build() + })?; + let change_note = format!( + "Log Level changed from {} to {}", + old_filter.map(|f| f.to_string()).unwrap_or_default(), + new_filter + ); + info!("{}", change_note.clone()); + Ok((StatusCode::OK, change_note)) +}