quickwit-oss · rdettai · Jan 14, 2025 · Jan 14, 2025 · Jan 14, 2025
diff --git a/docs/deployment/cluster-sizing.md b/docs/deployment/cluster-sizing.md
@@ -29,10 +29,10 @@ Here are some high-level guidelines to size your Indexer nodes:
     <!-- TODO: revisit this when cooperative indexing becomes the default -->
   - Don't use instances with less than 8GB of RAM
     <!-- Note: 2GB for the heap size (per pipeline) and 2GB for ingest queues -->
-- Mount the data directory to a volume of at least 110GB to store the [split
-  cache](../configuration/node-config.md#Indexer-configuration) and the [ingest
-  queue](../configuration/node-config.md#ingest-api-configuration).
-  <!-- Note: 4GB max_queue_disk_usage and 100GB split_store_max_num_bytes -->
+- Mount the data directory to a volume of at least 120GB to store
+  - the [split cache](../configuration/node-config.md#Indexer-configuration) (default 100GB)
+  - the [ingest queue](../configuration/node-config.md#ingest-api-configuration) (default 4GiB)
+  - a little extra for the indexes that are being built (first generation and merges)
 - Local SSDs are preferred for deploying Indexers since they generally provide the best performance per dollar and save some network bandwidth. However, remote disks can also if they provide roughly 20 MB/s of write throughput per core when using the ingest API or 10 MB/s when relying on other sources. For Amazon EBS volumes, this is equivalent to 320 or 160 IOPS per core (assuming 64 KB IOPS).
 
 :::note

diff --git a/quickwit/quickwit-indexing/src/actors/indexing_service.rs b/quickwit/quickwit-indexing/src/actors/indexing_service.rs
@@ -68,7 +68,7 @@ use super::{MergePlanner, MergeSchedulerService};
 use crate::actors::merge_pipeline::FinishPendingMergesAndShutdownPipeline;
 use crate::models::{DetachIndexingPipeline, DetachMergePipeline, ObservePipeline, SpawnPipeline};
 use crate::source::{AssignShards, Assignment};
-use crate::split_store::{LocalSplitStore, SplitStoreQuota};
+use crate::split_store::{IndexingSplitCache, SplitStoreQuota};
 use crate::{IndexingPipeline, IndexingPipelineParams, IndexingSplitStore, IndexingStatistics};
 
 /// Name of the indexing directory, usually located at `<data_dir_path>/indexing`.
@@ -113,7 +113,7 @@ pub struct IndexingService {
     storage_resolver: StorageResolver,
     indexing_pipelines: HashMap<PipelineUid, PipelineHandle>,
     counters: IndexingServiceCounters,
-    local_split_store: Arc<LocalSplitStore>,
+    local_split_store: Arc<IndexingSplitCache>,
     max_concurrent_split_uploads: usize,
     merge_pipeline_handles: HashMap<MergePipelineId, MergePipelineHandle>,
     cooperative_indexing_permits: Option<Arc<Semaphore>>,
@@ -147,15 +147,15 @@ impl IndexingService {
         storage_resolver: StorageResolver,
         event_broker: EventBroker,
     ) -> anyhow::Result<IndexingService> {
-        let split_store_space_quota = SplitStoreQuota::new(
+        let split_store_space_quota = SplitStoreQuota::try_new(
             indexer_config.split_store_max_num_splits,
             indexer_config.split_store_max_num_bytes,
-        );
+        )?;
         let merge_io_throughput_limiter_opt =
             indexer_config.max_merge_write_throughput.map(io::limiter);
         let split_cache_dir_path = get_cache_directory_path(&data_dir_path);
         let local_split_store =
-            LocalSplitStore::open(split_cache_dir_path, split_store_space_quota).await?;
+            IndexingSplitCache::open(split_cache_dir_path, split_store_space_quota).await?;
         let indexing_root_directory =
             temp_dir::create_or_purge_directory(&data_dir_path.join(INDEXING_DIR_NAME)).await?;
         let queue_dir_path = data_dir_path.join(QUEUES_DIR_NAME);