diff --git a/Cargo.lock b/Cargo.lock index c2356a4c2e4d..eee5d4ee5d7f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2214,6 +2214,18 @@ dependencies = [ name = "common-plugins" version = "0.9.3" +[[package]] +name = "common-pprof" +version = "0.9.3" +dependencies = [ + "common-error", + "common-macro", + "pprof", + "prost 0.12.6", + "snafu 0.8.5", + "tokio", +] + [[package]] name = "common-procedure" version = "0.9.3" @@ -10705,6 +10717,7 @@ dependencies = [ "common-mem-prof", "common-meta", "common-plugins", + "common-pprof", "common-query", "common-recordbatch", "common-runtime", diff --git a/Cargo.toml b/Cargo.toml index 63d7ad3ba739..72ad968ca758 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,6 +20,7 @@ members = [ "src/common/mem-prof", "src/common/meta", "src/common/plugins", + "src/common/pprof", "src/common/procedure", "src/common/procedure-test", "src/common/query", @@ -208,6 +209,7 @@ common-macro = { path = "src/common/macro" } common-mem-prof = { path = "src/common/mem-prof" } common-meta = { path = "src/common/meta" } common-plugins = { path = "src/common/plugins" } +common-pprof = { path = "src/common/pprof" } common-procedure = { path = "src/common/procedure" } common-procedure-test = { path = "src/common/procedure-test" } common-query = { path = "src/common/query" } diff --git a/docs/how-to/how-to-profile-cpu.md b/docs/how-to/how-to-profile-cpu.md index b73c85ea2f74..b1c5ded09ee1 100644 --- a/docs/how-to/how-to-profile-cpu.md +++ b/docs/how-to/how-to-profile-cpu.md @@ -1,11 +1,5 @@ # Profiling CPU -## Build GreptimeDB with `pprof` feature - -```bash -cargo build --features=pprof -``` - ## HTTP API Sample at 99 Hertz, for 5 seconds, output report in [protobuf format](https://github.com/google/pprof/blob/master/proto/profile.proto). ```bash diff --git a/docs/how-to/how-to-profile-memory.md b/docs/how-to/how-to-profile-memory.md index 7211683190a7..a0fe42df55ce 100644 --- a/docs/how-to/how-to-profile-memory.md +++ b/docs/how-to/how-to-profile-memory.md @@ -18,12 +18,6 @@ sudo apt install libjemalloc-dev curl https://raw.githubusercontent.com/brendangregg/FlameGraph/master/flamegraph.pl > ./flamegraph.pl ``` -### Build GreptimeDB with `mem-prof` feature. - -```bash -cargo build --features=mem-prof -``` - ## Profiling Start GreptimeDB instance with environment variables: diff --git a/src/cmd/Cargo.toml b/src/cmd/Cargo.toml index b57d2211875b..501f40a04715 100644 --- a/src/cmd/Cargo.toml +++ b/src/cmd/Cargo.toml @@ -10,7 +10,7 @@ name = "greptime" path = "src/bin/greptime.rs" [features] -default = ["python"] +default = ["python", "servers/pprof", "servers/mem-prof"] tokio-console = ["common-telemetry/tokio-console"] python = ["frontend/python"] diff --git a/src/common/pprof/Cargo.toml b/src/common/pprof/Cargo.toml new file mode 100644 index 000000000000..1657244d21f1 --- /dev/null +++ b/src/common/pprof/Cargo.toml @@ -0,0 +1,22 @@ +[package] +name = "common-pprof" +version.workspace = true +edition.workspace = true +license.workspace = true + +[dependencies] +common-error.workspace = true +common-macro.workspace = true +prost.workspace = true +snafu.workspace = true +tokio.workspace = true + +[target.'cfg(unix)'.dependencies] +pprof = { version = "0.13", features = [ + "flamegraph", + "prost-codec", + "protobuf", +] } + +[lints] +workspace = true diff --git a/src/common/pprof/src/lib.rs b/src/common/pprof/src/lib.rs new file mode 100644 index 000000000000..71bcda6559b4 --- /dev/null +++ b/src/common/pprof/src/lib.rs @@ -0,0 +1,99 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#[cfg(unix)] +pub mod nix; + +pub mod error { + use std::any::Any; + + use common_error::ext::ErrorExt; + use common_error::status_code::StatusCode; + use common_macro::stack_trace_debug; + use snafu::{Location, Snafu}; + + #[derive(Snafu)] + #[stack_trace_debug] + #[snafu(visibility(pub(crate)))] + pub enum Error { + #[cfg(unix)] + #[snafu(display("Pprof error"))] + Pprof { + #[snafu(source)] + error: pprof::Error, + #[snafu(implicit)] + location: Location, + }, + + #[snafu(display("Pprof is unsupported on this platform"))] + Unsupported { + #[snafu(implicit)] + location: Location, + }, + } + + pub type Result = std::result::Result; + + impl ErrorExt for Error { + fn status_code(&self) -> StatusCode { + match self { + #[cfg(unix)] + Error::Pprof { .. } => StatusCode::Unexpected, + Error::Unsupported { .. } => StatusCode::Unsupported, + } + } + + fn as_any(&self) -> &dyn Any { + self + } + } +} + +#[cfg(not(unix))] +pub mod dummy { + use std::time::Duration; + + use crate::error::{Result, UnsupportedSnafu}; + + /// Dummpy CPU profiler utility. + #[derive(Debug)] + pub struct Profiling {} + + impl Profiling { + /// Creates a new profiler. + pub fn new(_duration: Duration, _frequency: i32) -> Profiling { + Profiling {} + } + + /// Profiles and returns a generated text. + pub async fn dump_text(&self) -> Result { + UnsupportedSnafu {}.fail() + } + + /// Profiles and returns a generated flamegraph. + pub async fn dump_flamegraph(&self) -> Result> { + UnsupportedSnafu {}.fail() + } + + /// Profiles and returns a generated proto. + pub async fn dump_proto(&self) -> Result> { + UnsupportedSnafu {}.fail() + } + } +} + +#[cfg(not(unix))] +pub use dummy::Profiling; +#[cfg(unix)] +pub use nix::Profiling; diff --git a/src/common/pprof/src/nix.rs b/src/common/pprof/src/nix.rs new file mode 100644 index 000000000000..bd76f8fb3118 --- /dev/null +++ b/src/common/pprof/src/nix.rs @@ -0,0 +1,78 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::time::Duration; + +use pprof::protos::Message; +use snafu::ResultExt; + +use crate::error::{PprofSnafu, Result}; + +/// CPU profiler utility. +// Inspired by https://github.com/datafuselabs/databend/blob/67f445e83cd4eceda98f6c1c114858929d564029/src/common/base/src/base/profiling.rs +#[derive(Debug)] +pub struct Profiling { + /// Sample duration. + duration: Duration, + /// Sample frequency. + frequency: i32, +} + +impl Profiling { + /// Creates a new profiler. + pub fn new(duration: Duration, frequency: i32) -> Profiling { + Profiling { + duration, + frequency, + } + } + + /// Profiles and returns a generated pprof report. + pub async fn report(&self) -> Result { + let guard = pprof::ProfilerGuardBuilder::default() + .frequency(self.frequency) + .blocklist(&["libc", "libgcc", "pthread", "vdso"]) + .build() + .context(PprofSnafu)?; + tokio::time::sleep(self.duration).await; + guard.report().build().context(PprofSnafu) + } + + /// Profiles and returns a generated text. + pub async fn dump_text(&self) -> Result { + let report = self.report().await?; + let text = format!("{report:?}"); + Ok(text) + } + + /// Profiles and returns a generated flamegraph. + pub async fn dump_flamegraph(&self) -> Result> { + let mut body: Vec = Vec::new(); + + let report = self.report().await?; + report.flamegraph(&mut body).context(PprofSnafu)?; + + Ok(body) + } + + /// Profiles and returns a generated proto. + pub async fn dump_proto(&self) -> Result> { + let report = self.report().await?; + // Generate google’s pprof format report. + let profile = report.pprof().context(PprofSnafu)?; + let body = profile.encode_to_vec(); + + Ok(body) + } +} diff --git a/src/servers/Cargo.toml b/src/servers/Cargo.toml index 725ff497a4d8..df02a4485512 100644 --- a/src/servers/Cargo.toml +++ b/src/servers/Cargo.toml @@ -7,7 +7,7 @@ license.workspace = true [features] dashboard = [] mem-prof = ["dep:common-mem-prof"] -pprof = ["dep:pprof"] +pprof = ["dep:common-pprof"] testing = [] [lints] @@ -37,6 +37,7 @@ common-macro.workspace = true common-mem-prof = { workspace = true, optional = true } common-meta.workspace = true common-plugins.workspace = true +common-pprof = { workspace = true, optional = true } common-query.workspace = true common-recordbatch.workspace = true common-runtime.workspace = true @@ -75,11 +76,6 @@ pgwire = { version = "0.25.0", default-features = false, features = ["server-api pin-project = "1.0" pipeline.workspace = true postgres-types = { version = "0.2", features = ["with-chrono-0_4", "with-serde_json-1"] } -pprof = { version = "0.13", features = [ - "flamegraph", - "prost-codec", - "protobuf", -], optional = true } prometheus.workspace = true promql-parser.workspace = true prost.workspace = true @@ -136,7 +132,7 @@ tokio-postgres = "0.7" tokio-postgres-rustls = "0.12" tokio-test = "0.4" -[target.'cfg(not(windows))'.dev-dependencies] +[target.'cfg(unix)'.dev-dependencies] pprof = { version = "0.13", features = ["criterion", "flamegraph"] } [target.'cfg(windows)'.dependencies] diff --git a/src/servers/src/error.rs b/src/servers/src/error.rs index 0fde3b527c84..a796b895213c 100644 --- a/src/servers/src/error.rs +++ b/src/servers/src/error.rs @@ -424,9 +424,7 @@ pub enum Error { #[cfg(feature = "pprof")] #[snafu(display("Failed to dump pprof data"))] - DumpPprof { - source: crate::http::pprof::nix::Error, - }, + DumpPprof { source: common_pprof::error::Error }, #[cfg(not(windows))] #[snafu(display("Failed to update jemalloc metrics"))] diff --git a/src/servers/src/http/pprof.rs b/src/servers/src/http/pprof.rs index 12479444db58..c00d160aaf6a 100644 --- a/src/servers/src/http/pprof.rs +++ b/src/servers/src/http/pprof.rs @@ -12,9 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#[cfg(feature = "pprof")] -pub(crate) mod nix; - #[cfg(feature = "pprof")] pub mod handler { use std::num::NonZeroI32; @@ -23,13 +20,13 @@ pub mod handler { use axum::extract::Query; use axum::http::StatusCode; use axum::response::IntoResponse; + use common_pprof::Profiling; use common_telemetry::info; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use snafu::ResultExt; use crate::error::{DumpPprofSnafu, Result}; - use crate::http::pprof::nix::Profiling; /// Output format. #[derive(Debug, Serialize, Deserialize, JsonSchema)] @@ -70,8 +67,8 @@ pub mod handler { let body = match req.output { Output::Proto => profiling.dump_proto().await.context(DumpPprofSnafu)?, Output::Text => { - let report = profiling.report().await.context(DumpPprofSnafu)?; - format!("{:?}", report).into_bytes() + let report = profiling.dump_text().await.context(DumpPprofSnafu)?; + report.into_bytes() } Output::Flamegraph => profiling.dump_flamegraph().await.context(DumpPprofSnafu)?, };