From dc018001d8ed1759128362f6f8e9704edcfdbaee Mon Sep 17 00:00:00 2001 From: Paul Masurel Date: Mon, 13 May 2024 17:54:38 +0900 Subject: [PATCH] Added latency metrics for REST endpoint. (#4977) * Added latency metrics for REST endpoint. This PR also makes it possible to set specific buckets for histograms. Cloes #4932 * CR comments --- quickwit/quickwit-cli/src/metrics.rs | 1 + quickwit/quickwit-common/src/metrics.rs | 16 ++++++++++------ quickwit/quickwit-common/src/tower/metrics.rs | 2 ++ .../quickwit-ingest/src/ingest_v2/metrics.rs | 5 +++-- quickwit/quickwit-jaeger/src/metrics.rs | 5 ++++- .../quickwit-opentelemetry/src/otlp/metrics.rs | 5 ++++- quickwit/quickwit-search/src/metrics.rs | 5 ++++- quickwit/quickwit-serve/src/metrics.rs | 11 ++++++++++- quickwit/quickwit-serve/src/rest.rs | 12 ++++++++++-- 9 files changed, 48 insertions(+), 14 deletions(-) diff --git a/quickwit/quickwit-cli/src/metrics.rs b/quickwit/quickwit-cli/src/metrics.rs index 5fbd5b6e647..154f2226b3e 100644 --- a/quickwit/quickwit-cli/src/metrics.rs +++ b/quickwit/quickwit-cli/src/metrics.rs @@ -33,6 +33,7 @@ impl Default for CliMetrics { "cli", &[], [], + quickwit_common::metrics::exponential_buckets(5.0, 5.0, 5).unwrap(), ), } } diff --git a/quickwit/quickwit-common/src/metrics.rs b/quickwit/quickwit-common/src/metrics.rs index 96b42186f17..1e8c98f9157 100644 --- a/quickwit/quickwit-common/src/metrics.rs +++ b/quickwit/quickwit-common/src/metrics.rs @@ -21,11 +21,12 @@ use std::collections::{BTreeMap, HashMap}; use std::sync::OnceLock; use once_cell::sync::Lazy; -use prometheus::{Encoder, HistogramOpts, Opts, TextEncoder}; pub use prometheus::{ - Histogram, HistogramTimer, HistogramVec as PrometheusHistogramVec, IntCounter, - IntCounterVec as PrometheusIntCounterVec, IntGauge, IntGaugeVec as PrometheusIntGaugeVec, + exponential_buckets, Histogram, HistogramTimer, HistogramVec as PrometheusHistogramVec, + IntCounter, IntCounterVec as PrometheusIntCounterVec, IntGauge, + IntGaugeVec as PrometheusIntGaugeVec, }; +use prometheus::{Encoder, HistogramOpts, Opts, TextEncoder}; #[derive(Clone)] pub struct HistogramVec { @@ -146,10 +147,11 @@ pub fn new_gauge_vec( IntGaugeVec { underlying } } -pub fn new_histogram(name: &str, help: &str, subsystem: &str) -> Histogram { +pub fn new_histogram(name: &str, help: &str, subsystem: &str, buckets: Vec) -> Histogram { let histogram_opts = HistogramOpts::new(name, help) .namespace("quickwit") - .subsystem(subsystem); + .subsystem(subsystem) + .buckets(buckets); let histogram = Histogram::with_opts(histogram_opts).expect("failed to create histogram"); prometheus::register(Box::new(histogram.clone())).expect("failed to register histogram"); histogram @@ -161,6 +163,7 @@ pub fn new_histogram_vec( subsystem: &str, const_labels: &[(&str, &str)], label_names: [&str; N], + buckets: Vec, ) -> HistogramVec { let owned_const_labels: HashMap = const_labels .iter() @@ -169,7 +172,8 @@ pub fn new_histogram_vec( let histogram_opts = HistogramOpts::new(name, help) .namespace("quickwit") .subsystem(subsystem) - .const_labels(owned_const_labels); + .const_labels(owned_const_labels) + .buckets(buckets); let underlying = PrometheusHistogramVec::new(histogram_opts, &label_names) .expect("failed to create histogram vec"); diff --git a/quickwit/quickwit-common/src/tower/metrics.rs b/quickwit/quickwit-common/src/tower/metrics.rs index e197466894e..2ec2b73f9bd 100644 --- a/quickwit/quickwit-common/src/tower/metrics.rs +++ b/quickwit/quickwit-common/src/tower/metrics.rs @@ -23,6 +23,7 @@ use std::time::Instant; use futures::{ready, Future}; use pin_project::{pin_project, pinned_drop}; +use prometheus::exponential_buckets; use tower::{Layer, Service}; use crate::metrics::{ @@ -103,6 +104,7 @@ impl GrpcMetricsLayer { subsystem, &[("kind", kind)], ["rpc", "status"], + exponential_buckets(0.001, 2.0, 12).unwrap(), ), } } diff --git a/quickwit/quickwit-ingest/src/ingest_v2/metrics.rs b/quickwit/quickwit-ingest/src/ingest_v2/metrics.rs index 5bdeb302047..c140f2e9d4b 100644 --- a/quickwit/quickwit-ingest/src/ingest_v2/metrics.rs +++ b/quickwit/quickwit-ingest/src/ingest_v2/metrics.rs @@ -20,8 +20,8 @@ use mrecordlog::ResourceUsage; use once_cell::sync::Lazy; use quickwit_common::metrics::{ - new_counter_vec, new_gauge, new_gauge_vec, new_histogram_vec, HistogramVec, IntCounterVec, - IntGauge, IntGaugeVec, + exponential_buckets, new_counter_vec, new_gauge, new_gauge_vec, new_histogram_vec, + HistogramVec, IntCounterVec, IntGauge, IntGaugeVec, }; pub(super) struct IngestV2Metrics { @@ -69,6 +69,7 @@ impl Default for IngestV2Metrics { "ingest", &[], ["operation", "type"], + exponential_buckets(0.001, 2.0, 12).unwrap(), ), wal_disk_used_bytes: new_gauge( "wal_disk_used_bytes", diff --git a/quickwit/quickwit-jaeger/src/metrics.rs b/quickwit/quickwit-jaeger/src/metrics.rs index 3c8dde0579e..c776c849043 100644 --- a/quickwit/quickwit-jaeger/src/metrics.rs +++ b/quickwit/quickwit-jaeger/src/metrics.rs @@ -18,7 +18,9 @@ // along with this program. If not, see . use once_cell::sync::Lazy; -use quickwit_common::metrics::{new_counter_vec, new_histogram_vec, HistogramVec, IntCounterVec}; +use quickwit_common::metrics::{ + exponential_buckets, new_counter_vec, new_histogram_vec, HistogramVec, IntCounterVec, +}; pub struct JaegerServiceMetrics { pub requests_total: IntCounterVec<2>, @@ -52,6 +54,7 @@ impl Default for JaegerServiceMetrics { "jaeger", &[], ["operation", "index", "error"], + exponential_buckets(0.02, 2.0, 8).unwrap(), ), fetched_traces_total: new_counter_vec( "fetched_traces_total", diff --git a/quickwit/quickwit-opentelemetry/src/otlp/metrics.rs b/quickwit/quickwit-opentelemetry/src/otlp/metrics.rs index fa39789300e..c94929d5985 100644 --- a/quickwit/quickwit-opentelemetry/src/otlp/metrics.rs +++ b/quickwit/quickwit-opentelemetry/src/otlp/metrics.rs @@ -18,7 +18,9 @@ // along with this program. If not, see . use once_cell::sync::Lazy; -use quickwit_common::metrics::{new_counter_vec, new_histogram_vec, HistogramVec, IntCounterVec}; +use quickwit_common::metrics::{ + exponential_buckets, new_counter_vec, new_histogram_vec, HistogramVec, IntCounterVec, +}; pub struct OtlpServiceMetrics { pub requests_total: IntCounterVec<4>, @@ -52,6 +54,7 @@ impl Default for OtlpServiceMetrics { "otlp", &[], ["service", "index", "transport", "format", "error"], + exponential_buckets(0.02, 2.0, 8).unwrap(), ), ingested_log_records_total: new_counter_vec( "ingested_log_records_total", diff --git a/quickwit/quickwit-search/src/metrics.rs b/quickwit/quickwit-search/src/metrics.rs index 47e06129d61..4031230f085 100644 --- a/quickwit/quickwit-search/src/metrics.rs +++ b/quickwit/quickwit-search/src/metrics.rs @@ -20,7 +20,9 @@ // See https://prometheus.io/docs/practices/naming/ use once_cell::sync::Lazy; -use quickwit_common::metrics::{new_counter, new_histogram, Histogram, IntCounter}; +use quickwit_common::metrics::{ + exponential_buckets, new_counter, new_histogram, Histogram, IntCounter, +}; pub struct SearchMetrics { pub leaf_searches_splits_total: IntCounter, @@ -40,6 +42,7 @@ impl Default for SearchMetrics { "Number of seconds required to run a leaf search over a single split. The timer \ starts after the semaphore is obtained.", "search", + exponential_buckets(0.005, 2.0, 10).unwrap(), ), } } diff --git a/quickwit/quickwit-serve/src/metrics.rs b/quickwit/quickwit-serve/src/metrics.rs index 85669ed9e8d..92fe1796cdf 100644 --- a/quickwit/quickwit-serve/src/metrics.rs +++ b/quickwit/quickwit-serve/src/metrics.rs @@ -18,10 +18,11 @@ // along with this program. If not, see . use once_cell::sync::Lazy; -use quickwit_common::metrics::{new_counter_vec, IntCounterVec}; +use quickwit_common::metrics::{new_counter_vec, new_histogram_vec, HistogramVec, IntCounterVec}; pub struct RestMetrics { pub http_requests_total: IntCounterVec<2>, + pub request_duration_secs: HistogramVec<2>, } impl Default for RestMetrics { @@ -34,6 +35,14 @@ impl Default for RestMetrics { &[], ["method", "status_code"], ), + request_duration_secs: new_histogram_vec( + "request_duration_secs", + "Response time in seconds", + "", + &[], + ["method", "status_code"], + quickwit_common::metrics::exponential_buckets(0.02, 2.0, 8).unwrap(), + ), } } } diff --git a/quickwit/quickwit-serve/src/rest.rs b/quickwit/quickwit-serve/src/rest.rs index e810863c930..6b61cba495c 100644 --- a/quickwit/quickwit-serve/src/rest.rs +++ b/quickwit/quickwit-serve/src/rest.rs @@ -29,6 +29,7 @@ use tower_http::compression::predicate::{DefaultPredicate, Predicate, SizeAbove} use tower_http::compression::CompressionLayer; use tower_http::cors::CorsLayer; use tracing::{error, info}; +use warp::filters::log::Info; use warp::{redirect, Filter, Rejection, Reply}; use crate::cluster_api::cluster_handler; @@ -71,10 +72,17 @@ pub(crate) async fn start_rest_server( readiness_trigger: BoxFutureInfaillible<()>, shutdown_signal: BoxFutureInfaillible<()>, ) -> anyhow::Result<()> { - let request_counter = warp::log::custom(|info| { + let request_counter = warp::log::custom(|info: Info| { + let elapsed = info.elapsed(); + let status = info.status(); + let label_values: [&str; 2] = [info.method().as_str(), status.as_str()]; + crate::SERVE_METRICS + .request_duration_secs + .with_label_values(label_values) + .observe(elapsed.as_secs_f64()); crate::SERVE_METRICS .http_requests_total - .with_label_values([info.method().as_str(), info.status().as_str()]) + .with_label_values(label_values) .inc(); }); // Docs routes