From f1fd431033f9c723f5ad409f6654d4948e45d0f8 Mon Sep 17 00:00:00 2001 From: trinity-1686a Date: Fri, 17 May 2024 15:39:59 +0200 Subject: [PATCH 1/3] add metric for search split affinity --- quickwit/quickwit-search/src/metrics.rs | 14 ++++++++++++++ quickwit/quickwit-search/src/search_job_placer.rs | 14 +++++++++++++- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/quickwit/quickwit-search/src/metrics.rs b/quickwit/quickwit-search/src/metrics.rs index 718d7b307b1..9928756982b 100644 --- a/quickwit/quickwit-search/src/metrics.rs +++ b/quickwit/quickwit-search/src/metrics.rs @@ -27,6 +27,8 @@ use quickwit_common::metrics::{ pub struct SearchMetrics { pub leaf_searches_splits_total: IntCounter, pub leaf_search_split_duration_secs: Histogram, + pub job_assigned_total: IntCounter, + pub job_assigned_to_affinity_searcher: IntCounter, } impl Default for SearchMetrics { @@ -45,6 +47,18 @@ impl Default for SearchMetrics { "search", exponential_buckets(0.005, 2.0, 10).unwrap(), ), + job_assigned_total: new_counter( + "job_assigned_total", + "Number of job assigned to searchers", + "search", + &[], + ), + job_assigned_to_affinity_searcher: new_counter( + "job_assigned_to_affinity_searcher", + "Number of job assigned to the searcher of highest affinity", + "search", + &[], + ), } } } diff --git a/quickwit/quickwit-search/src/search_job_placer.rs b/quickwit/quickwit-search/src/search_job_placer.rs index 24c6677d0ee..fcf6b96ee27 100644 --- a/quickwit/quickwit-search/src/search_job_placer.rs +++ b/quickwit/quickwit-search/src/search_job_placer.rs @@ -29,7 +29,7 @@ use quickwit_common::pubsub::EventSubscriber; use quickwit_common::rendezvous_hasher::{node_affinity, sort_by_rendez_vous_hash}; use quickwit_proto::search::{ReportSplit, ReportSplitsRequest}; -use crate::{SearchJob, SearchServiceClient, SearcherPool}; +use crate::{SearchJob, SearchServiceClient, SearcherPool, SEARCH_METRICS}; /// Job. /// The unit in which distributed search is performed. @@ -181,10 +181,22 @@ impl SearchJobPlacer { sort_by_rendez_vous_hash(&mut candidate_nodes, job.split_id()); // Select the least loaded node. let chosen_node_idx = if candidate_nodes.len() >= 2 { + // TODO: we currently give little flexibility. By allowing node 0 to be slighly + // more loaded, we could improve the chance of picking the same node for multiple + // queries, without impacting load balancing much. + // for instance allowing node 0 to have sum(job.cost)/count(searcher) * 0.05 of + // load more than node 1 seems to improve choosing the 1st node in simulations, + // while not overloading a searcher by more than 5% over what a simple power of two + // choices would usize::from(candidate_nodes[0].load > candidate_nodes[1].load) } else { 0 }; + if chosen_node_idx == 0 { + SEARCH_METRICS.job_assigned_to_affinity_searcher.inc(); + } + SEARCH_METRICS.job_assigned_total.inc(); + let chosen_node = &mut candidate_nodes[chosen_node_idx]; chosen_node.load += job.cost(); From 700fcfed32427c5b7f5dd2450d1ee5cedcadaf3b Mon Sep 17 00:00:00 2001 From: trinity-1686a Date: Mon, 27 May 2024 15:10:12 +0200 Subject: [PATCH 2/3] rename metric and remove comment about job assignment. it was made to create a discussion, and the discussion now exists --- quickwit/quickwit-search/src/metrics.rs | 2 +- quickwit/quickwit-search/src/search_job_placer.rs | 7 ------- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/quickwit/quickwit-search/src/metrics.rs b/quickwit/quickwit-search/src/metrics.rs index 9928756982b..9bdecbee39d 100644 --- a/quickwit/quickwit-search/src/metrics.rs +++ b/quickwit/quickwit-search/src/metrics.rs @@ -54,7 +54,7 @@ impl Default for SearchMetrics { &[], ), job_assigned_to_affinity_searcher: new_counter( - "job_assigned_to_affinity_searcher", + "job_assigned_to_affinity_searcher_total", "Number of job assigned to the searcher of highest affinity", "search", &[], diff --git a/quickwit/quickwit-search/src/search_job_placer.rs b/quickwit/quickwit-search/src/search_job_placer.rs index fcf6b96ee27..65f556d9754 100644 --- a/quickwit/quickwit-search/src/search_job_placer.rs +++ b/quickwit/quickwit-search/src/search_job_placer.rs @@ -181,13 +181,6 @@ impl SearchJobPlacer { sort_by_rendez_vous_hash(&mut candidate_nodes, job.split_id()); // Select the least loaded node. let chosen_node_idx = if candidate_nodes.len() >= 2 { - // TODO: we currently give little flexibility. By allowing node 0 to be slighly - // more loaded, we could improve the chance of picking the same node for multiple - // queries, without impacting load balancing much. - // for instance allowing node 0 to have sum(job.cost)/count(searcher) * 0.05 of - // load more than node 1 seems to improve choosing the 1st node in simulations, - // while not overloading a searcher by more than 5% over what a simple power of two - // choices would usize::from(candidate_nodes[0].load > candidate_nodes[1].load) } else { 0 From 38f2a58541ae2b4571b7d3d2303ec7e7cf450141 Mon Sep 17 00:00:00 2001 From: trinity-1686a Date: Wed, 29 May 2024 19:01:54 +0200 Subject: [PATCH 3/3] use single metric with label --- quickwit/quickwit-search/src/metrics.rs | 17 ++++++----------- .../quickwit-search/src/search_job_placer.rs | 13 +++++++++---- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/quickwit/quickwit-search/src/metrics.rs b/quickwit/quickwit-search/src/metrics.rs index 9bdecbee39d..449b747f162 100644 --- a/quickwit/quickwit-search/src/metrics.rs +++ b/quickwit/quickwit-search/src/metrics.rs @@ -21,14 +21,14 @@ use once_cell::sync::Lazy; use quickwit_common::metrics::{ - exponential_buckets, new_counter, new_histogram, Histogram, IntCounter, + exponential_buckets, new_counter, new_counter_vec, new_histogram, Histogram, IntCounter, + IntCounterVec, }; pub struct SearchMetrics { pub leaf_searches_splits_total: IntCounter, pub leaf_search_split_duration_secs: Histogram, - pub job_assigned_total: IntCounter, - pub job_assigned_to_affinity_searcher: IntCounter, + pub job_assigned_total: IntCounterVec<1>, } impl Default for SearchMetrics { @@ -47,17 +47,12 @@ impl Default for SearchMetrics { "search", exponential_buckets(0.005, 2.0, 10).unwrap(), ), - job_assigned_total: new_counter( + job_assigned_total: new_counter_vec( "job_assigned_total", - "Number of job assigned to searchers", - "search", - &[], - ), - job_assigned_to_affinity_searcher: new_counter( - "job_assigned_to_affinity_searcher_total", - "Number of job assigned to the searcher of highest affinity", + "Number of job assigned to searchers, per affinity rank.", "search", &[], + ["affinity"], ), } } diff --git a/quickwit/quickwit-search/src/search_job_placer.rs b/quickwit/quickwit-search/src/search_job_placer.rs index 65f556d9754..dc708763ff1 100644 --- a/quickwit/quickwit-search/src/search_job_placer.rs +++ b/quickwit/quickwit-search/src/search_job_placer.rs @@ -185,10 +185,15 @@ impl SearchJobPlacer { } else { 0 }; - if chosen_node_idx == 0 { - SEARCH_METRICS.job_assigned_to_affinity_searcher.inc(); - } - SEARCH_METRICS.job_assigned_total.inc(); + let metric_node_idx = match chosen_node_idx { + 0 => "0", + 1 => "1", + _ => "> 1", + }; + SEARCH_METRICS + .job_assigned_total + .with_label_values([metric_node_idx]) + .inc(); let chosen_node = &mut candidate_nodes[chosen_node_idx]; chosen_node.load += job.cost();