diff --git a/quickwit/Cargo.lock b/quickwit/Cargo.lock index 0d58253ef34..2d0b51d90b3 100644 --- a/quickwit/Cargo.lock +++ b/quickwit/Cargo.lock @@ -1116,6 +1116,12 @@ version = "0.6.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e1e5f035d16fc623ae5f74981db80a439803888314e3a555fd6f04acd51a3205" +[[package]] +name = "bytemuck" +version = "1.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78834c15cb5d5efe3452d58b1e8ba890dd62d21907f867f383358198e56ebca5" + [[package]] name = "byteorder" version = "1.5.0" @@ -1556,6 +1562,15 @@ version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f" +[[package]] +name = "cpp_demangle" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e8227005286ec39567949b33df9896bcadfa6051bccca2488129f108ca23119" +dependencies = [ + "cfg-if", +] + [[package]] name = "cpufeatures" version = "0.2.12" @@ -1881,6 +1896,15 @@ version = "0.3.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f578e8e2c440e7297e008bb5486a3a8a194775224bbc23729b0dbdfaeebf162e" +[[package]] +name = "debugid" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef552e6f588e446098f6ba40d89ac146c8c7b64aade83c051ee00bb5d2bc18d" +dependencies = [ + "uuid", +] + [[package]] name = "der" version = "0.6.1" @@ -2345,6 +2369,18 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "findshlibs" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40b9e59cd0f7e0806cca4be089683ecb6434e602038df21fe6bf6711b2f07f64" +dependencies = [ + "cc", + "lazy_static", + "libc", + "winapi 0.3.9", +] + [[package]] name = "finl_unicode" version = "1.2.0" @@ -3220,6 +3256,24 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "64e9829a50b42bb782c1df523f78d332fe371b10c661e78b7a3c34b0198e9fac" +[[package]] +name = "inferno" +version = "0.11.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "321f0f839cd44a4686e9504b0a62b4d69a50b62072144c71c68f5873c167b8d9" +dependencies = [ + "ahash 0.8.11", + "indexmap 2.1.0", + "is-terminal", + "itoa", + "log", + "num-format", + "once_cell", + "quick-xml 0.26.0", + "rgb", + "str_stack", +] + [[package]] name = "inherent" version = "1.0.11" @@ -4146,6 +4200,17 @@ dependencies = [ "regex", ] +[[package]] +name = "nix" +version = "0.26.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "598beaf3cc6fdd9a5dfb1630c2800c7acd31df7aaf0f565796fba2b53ca1af1b" +dependencies = [ + "bitflags 1.3.2", + "cfg-if", + "libc", +] + [[package]] name = "no-std-net" version = "0.6.0" @@ -4212,6 +4277,16 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" +[[package]] +name = "num-format" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a652d9771a63711fd3c3deb670acfbe5c30a4072e664d7a3bf5a9e1056ac72c3" +dependencies = [ + "arrayvec 0.7.4", + "itoa", +] + [[package]] name = "num-integer" version = "0.1.46" @@ -5192,6 +5267,27 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" +[[package]] +name = "pprof" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef5c97c51bd34c7e742402e216abdeb44d415fbe6ae41d56b114723e953711cb" +dependencies = [ + "backtrace", + "cfg-if", + "findshlibs", + "inferno", + "libc", + "log", + "nix", + "once_cell", + "parking_lot", + "smallvec", + "symbolic-demangle", + "tempfile", + "thiserror", +] + [[package]] name = "ppv-lite86" version = "0.2.17" @@ -5528,6 +5624,15 @@ version = "1.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" +[[package]] +name = "quick-xml" +version = "0.26.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f50b1c63b38611e7d4d7f68b82d3ad0cc71a2ad2e7f61fc10f1328d917c93cd" +dependencies = [ + "memchr", +] + [[package]] name = "quick-xml" version = "0.29.0" @@ -6345,6 +6450,7 @@ dependencies = [ "once_cell", "opentelemetry", "percent-encoding", + "pprof", "prost", "prost-types", "quickwit-actors", @@ -6787,6 +6893,15 @@ dependencies = [ "zeroize", ] +[[package]] +name = "rgb" +version = "0.8.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05aaa8004b64fd573fc9d002f4e632d51ad4f026c2b5ba95fcb6c2f32c2c47d8" +dependencies = [ + "bytemuck", +] + [[package]] name = "ring" version = "0.16.20" @@ -7888,6 +8003,12 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" +[[package]] +name = "str_stack" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9091b6114800a5f2141aee1d1b9d6ca3592ac062dc5decb3764ec5895a47b4eb" + [[package]] name = "string_cache" version = "0.8.7" @@ -7939,6 +8060,29 @@ version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc" +[[package]] +name = "symbolic-common" +version = "12.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89d2aef0f60f62e38c472334148758afbd570ed78d20be622692e5ebfec3734f" +dependencies = [ + "debugid", + "memmap2", + "stable_deref_trait", + "uuid", +] + +[[package]] +name = "symbolic-demangle" +version = "12.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1719d1292eac816cdd3fdad12b22315624b7ce6a7bacb267a3a27fccfd286b48" +dependencies = [ + "cpp_demangle", + "rustc-demangle", + "symbolic-common", +] + [[package]] name = "syn" version = "1.0.109" diff --git a/quickwit/Cargo.toml b/quickwit/Cargo.toml index 0f1778a2309..84919ccb453 100644 --- a/quickwit/Cargo.toml +++ b/quickwit/Cargo.toml @@ -171,6 +171,7 @@ postcard = { version = "1.0.4", features = [ ], default-features = false } predicates = "3" prettyplease = "0.2.0" +pprof = { version = "0.13", features = ["flamegraph"] } proc-macro2 = "1.0.50" prometheus = { version = "0.13", features = ["process"] } proptest = "1" diff --git a/quickwit/quickwit-cli/Cargo.toml b/quickwit/quickwit-cli/Cargo.toml index d5c2b3f6acc..aeb14866f85 100644 --- a/quickwit/quickwit-cli/Cargo.toml +++ b/quickwit/quickwit-cli/Cargo.toml @@ -81,6 +81,7 @@ quickwit-storage = { workspace = true, features = ["testsuite"] } [features] jemalloc = ["dep:tikv-jemalloc-ctl", "dep:tikv-jemallocator"] ci-test = [] +pprof = ["quickwit-serve/pprof"] openssl-support = ["openssl-probe"] # Requires to enable tokio unstable via RUSTFLAGS="--cfg tokio_unstable" tokio-console = ["console-subscriber", "quickwit-common/named_tasks"] diff --git a/quickwit/quickwit-serve/Cargo.toml b/quickwit/quickwit-serve/Cargo.toml index dafb56ca6ae..22db2523d05 100644 --- a/quickwit/quickwit-serve/Cargo.toml +++ b/quickwit/quickwit-serve/Cargo.toml @@ -30,6 +30,7 @@ mime_guess = { workspace = true } once_cell = { workspace = true } opentelemetry = { workspace = true } percent-encoding = { workspace = true } +pprof = { workspace = true, optional = true } prost = { workspace = true } prost-types = { workspace = true } regex = { workspace = true } @@ -91,3 +92,9 @@ quickwit-opentelemetry = { workspace = true, features = ["testsuite"] } quickwit-proto = { workspace = true, features = ["testsuite"] } quickwit-search = { workspace = true, features = ["testsuite"] } quickwit-storage = { workspace = true, features = ["testsuite"] } + +[features] +pprof = [ + "dep:pprof" +] + diff --git a/quickwit/quickwit-serve/src/lib.rs b/quickwit/quickwit-serve/src/lib.rs index 22c6730fc43..b56d89a0da2 100644 --- a/quickwit/quickwit-serve/src/lib.rs +++ b/quickwit/quickwit-serve/src/lib.rs @@ -38,6 +38,7 @@ mod metrics_api; mod node_info_handler; mod openapi; mod otlp_api; +mod pprof; mod rate_modulator; mod rest; mod rest_api_response; diff --git a/quickwit/quickwit-serve/src/pprof.rs b/quickwit/quickwit-serve/src/pprof.rs new file mode 100644 index 00000000000..231aa39b26d --- /dev/null +++ b/quickwit/quickwit-serve/src/pprof.rs @@ -0,0 +1,106 @@ +// Copyright (C) 2024 Quickwit, Inc. +// +// Quickwit is offered under the AGPL v3.0 and as commercial software. +// For commercial licensing, contact us at hello@quickwit.io. +// +// AGPL: +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +use warp::Filter; + +/// pprof/start to start cpu profiling +/// pprof/stop to stop cpu profiling and return a flamegraph +#[cfg(not(feature = "pprof"))] +pub fn pprof_routes() -> impl Filter + Clone { + let start_profiler = { + warp::path!("pprof" / "start").map(move || { + warp::reply::with_status( + "not compiled with pprof feature", + warp::http::StatusCode::BAD_REQUEST, + ) + }) + }; + + let stop_profiler = { + warp::path!("pprof" / "stop").map(move || { + warp::reply::with_status( + "not compiled with pprof feature", + warp::http::StatusCode::BAD_REQUEST, + ) + }) + }; + + start_profiler.or(stop_profiler) +} + +/// pprof/start to start cpu profiling +/// pprof/stop to stop cpu profiling and return a flamegraph +#[cfg(feature = "pprof")] +pub fn pprof_routes() -> impl Filter + Clone { + use std::sync::{Arc, Mutex}; + + use pprof::ProfilerGuard; + use warp::reply::Reply; + let profiler_guard: Arc>>> = Arc::new(Mutex::new(None)); + + let start_profiler = { + let profiler_guard = Arc::clone(&profiler_guard); + warp::path!("pprof" / "start").map(move || { + let mut guard = profiler_guard.lock().unwrap(); + if guard.is_none() { + *guard = Some(pprof::ProfilerGuard::new(100).unwrap()); + warp::reply::with_status("CPU profiling started", warp::http::StatusCode::OK) + } else { + warp::reply::with_status( + "CPU profiling is already running", + warp::http::StatusCode::BAD_REQUEST, + ) + } + }) + }; + + let stop_profiler = { + let profiler_guard = Arc::clone(&profiler_guard); + warp::path!("pprof" / "stop").map(move || { + let profiler_guard = Arc::clone(&profiler_guard); + get_flamegraph(profiler_guard) + }) + }; + + fn get_flamegraph(profiler_guard: Arc>>) -> impl warp::Reply { + let mut guard = profiler_guard.lock().unwrap(); + if let Some(profiler) = guard.take() { + if let Ok(report) = profiler.report().build() { + let mut buffer = Vec::new(); + if report.flamegraph(&mut buffer).is_ok() { + return warp::reply::with_header(buffer, "Content-Type", "image/svg+xml") + .into_response(); + } + } + warp::reply::with_status( + "Failed to generate flamegraph", + warp::http::StatusCode::INTERNAL_SERVER_ERROR, + ) + .into_response() + } else { + warp::reply::with_status( + "CPU profiling is not running", + warp::http::StatusCode::BAD_REQUEST, + ) + .into_response() + } + } + + start_profiler.or(stop_profiler) +} diff --git a/quickwit/quickwit-serve/src/rest.rs b/quickwit/quickwit-serve/src/rest.rs index c3e90550865..b97d0fee031 100644 --- a/quickwit/quickwit-serve/src/rest.rs +++ b/quickwit/quickwit-serve/src/rest.rs @@ -46,6 +46,7 @@ use crate::jaeger_api::jaeger_api_handlers; use crate::metrics_api::metrics_handler; use crate::node_info_handler::node_info_handler; use crate::otlp_api::otlp_ingest_api_handlers; +use crate::pprof::pprof_routes; use crate::rest_api_response::{RestApiError, RestApiResponse}; use crate::search_api::{search_get_handler, search_post_handler, search_stream_handler}; use crate::template_api::index_template_api_handlers; @@ -158,6 +159,9 @@ pub(crate) async fn start_rest_server( quickwit_services.env_filter_reload_fn.clone(), ); + // `/pprof` route. + let cpu_pprof_routes = pprof_routes(); + // `/api/v1/*` routes. let api_v1_root_route = api_v1_routes(quickwit_services.clone()); @@ -181,6 +185,7 @@ pub(crate) async fn start_rest_server( .or(health_check_routes) .or(metrics_routes) .or(developer_routes) + .or(cpu_pprof_routes) .with(request_counter) .recover(recover_fn) .with(extra_headers)