diff --git a/Cargo.toml b/Cargo.toml index 0e4457b832..a67d9d8b4b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -116,6 +116,11 @@ unstable = [] # useful for benches. quickwit = ["sstable", "futures-util"] +# Compares only the hash of a string when indexing data. +# Increases indexing speed, but may lead to extremely rare missing terms, when there's a hash collision. +# Uses 64bit ahash. +compare_hash_only = ["stacker/compare_hash_only"] + [workspace] members = ["query-grammar", "bitpacker", "common", "ownedbytes", "stacker", "sstable", "tokenizer-api", "columnar"] diff --git a/src/indexer/segment_writer.rs b/src/indexer/segment_writer.rs index 180d5a66d8..1888f3b47e 100644 --- a/src/indexer/segment_writer.rs +++ b/src/indexer/segment_writer.rs @@ -495,7 +495,6 @@ mod tests { use tempfile::TempDir; - use super::compute_initial_table_size; use crate::collector::{Count, TopDocs}; use crate::core::json_utils::JsonTermWriter; use crate::directory::RamDirectory; @@ -516,7 +515,9 @@ mod tests { }; #[test] + #[cfg(not(feature = "compare_hash_only"))] fn test_hashmap_size() { + use super::compute_initial_table_size; assert_eq!(compute_initial_table_size(100_000).unwrap(), 1 << 12); assert_eq!(compute_initial_table_size(1_000_000).unwrap(), 1 << 15); assert_eq!(compute_initial_table_size(15_000_000).unwrap(), 1 << 19);