Skip to content

Commit

Permalink
Adding some kind of throttling on the janitor to prevent it from
Browse files Browse the repository at this point in the history
hammering the metastore on airmail.
  • Loading branch information
fulmicoton committed Oct 22, 2024
1 parent 61483b9 commit d72c494
Showing 1 changed file with 20 additions and 4 deletions.
24 changes: 20 additions & 4 deletions quickwit/quickwit-index-management/src/garbage_collection.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

use std::collections::{HashMap, HashSet};
use std::path::{Path, PathBuf};
use std::sync::Arc;
use std::sync::{Arc, OnceLock};
use std::time::Duration;

use anyhow::Context;
Expand Down Expand Up @@ -295,6 +295,12 @@ async fn list_splits_metadata(
Ok(splits)
}

fn maximum_split_deletion_rate_per_sec() -> usize {
static MAXIMUM_SPLIT_DELETION_RATE_PER_SEC: std::sync::OnceLock<usize> = OnceLock::new();
*MAXIMUM_SPLIT_DELETION_RATE_PER_SEC
.get_or_init(|| quickwit_common::get_from_env("MAXIMUM_SPLIT_DELETION_RATE_PER_SEC", 300))
}

/// Removes any splits marked for deletion which haven't been
/// updated after `updated_before_timestamp` in batches of 1000 splits.
///
Expand Down Expand Up @@ -325,9 +331,11 @@ async fn delete_splits_marked_for_deletion_several_indexes(
.with_limit(DELETE_SPLITS_BATCH_SIZE)
.sort_by_index_uid();

let mut splits_to_delete_possibly_remaining = true;

while splits_to_delete_possibly_remaining {
loop {
let sleep_duration_secs: u64 =
DELETE_SPLITS_BATCH_SIZE.div_ceil(maximum_split_deletion_rate_per_sec()) as u64;
let sleep_future = tokio::time::sleep(Duration::from_secs(sleep_duration_secs));
let splits_metadata_to_delete: Vec<SplitMetadata> = match protect_future(
progress_opt,
list_splits_metadata(&metastore, &list_splits_query),
Expand All @@ -345,7 +353,7 @@ async fn delete_splits_marked_for_deletion_several_indexes(
// To detect if this is the last page, we check if the number of splits is less than the
// limit.
assert!(splits_metadata_to_delete.len() <= DELETE_SPLITS_BATCH_SIZE);
splits_to_delete_possibly_remaining =
let splits_to_delete_possibly_remaining: bool =
splits_metadata_to_delete.len() == DELETE_SPLITS_BATCH_SIZE;

// set split after which to search for the next loop
Expand Down Expand Up @@ -378,6 +386,14 @@ async fn delete_splits_marked_for_deletion_several_indexes(
&mut split_removal_info,
)
.await;

if !splits_to_delete_possibly_remaining {
// stop the gc if this was the last batch
// we are guaranteed to make progress due to .after_split()
break;
} else {
sleep_future.await;
}
}

split_removal_info
Expand Down

0 comments on commit d72c494

Please sign in to comment.