From 7fe406e5428baa9220e7ae162e2aa34315c76146 Mon Sep 17 00:00:00 2001 From: Paul Masurel Date: Wed, 16 Oct 2024 18:22:03 +0900 Subject: [PATCH] Adding some kind of throttling on the janitor to prevent it from hammering the metastore on airmail. --- .../src/garbage_collection.rs | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/quickwit/quickwit-index-management/src/garbage_collection.rs b/quickwit/quickwit-index-management/src/garbage_collection.rs index f5a7545c1f4..b3fc1ecb01d 100644 --- a/quickwit/quickwit-index-management/src/garbage_collection.rs +++ b/quickwit/quickwit-index-management/src/garbage_collection.rs @@ -19,7 +19,7 @@ use std::collections::{HashMap, HashSet}; use std::path::{Path, PathBuf}; -use std::sync::Arc; +use std::sync::{Arc, OnceLock}; use std::time::Duration; use anyhow::Context; @@ -299,6 +299,12 @@ async fn list_splits_metadata( Ok(splits) } +fn maximum_split_deletion_rate_per_sec() -> usize { + static MAXIMUM_SPLIT_DELETION_RATE_PER_SEC: std::sync::OnceLock = OnceLock::new(); + *MAXIMUM_SPLIT_DELETION_RATE_PER_SEC + .get_or_init(|| quickwit_common::get_from_env("MAXIMUM_SPLIT_DELETION_RATE_PER_SEC", 300)) +} + /// Removes any splits marked for deletion which haven't been /// updated after `updated_before_timestamp` in batches of 1000 splits. /// @@ -329,6 +335,9 @@ async fn delete_splits_marked_for_deletion_several_indexes( .sort_by_index_uid(); loop { + let sleep_duration_secs: u64 = + DELETE_SPLITS_BATCH_SIZE.div_ceil(maximum_split_deletion_rate_per_sec()) as u64; + let sleep_future = tokio::time::sleep(Duration::from_secs(sleep_duration_secs)); let splits_metadata_to_delete: Vec = match protect_future( progress_opt, list_splits_metadata(&metastore, &list_splits_query), @@ -372,6 +381,8 @@ async fn delete_splits_marked_for_deletion_several_indexes( // stop the gc if this was the last batch // we are guaranteed to make progress due to .after_split() break; + } else { + sleep_future.await; } }