From d2deae6014bba06e83389150bf3c53a037cb5e2e Mon Sep 17 00:00:00 2001 From: trinity-1686a Date: Fri, 7 Jun 2024 15:30:56 +0200 Subject: [PATCH 1/2] update tantivy --- quickwit/Cargo.lock | 89 +++------ quickwit/Cargo.toml | 2 +- .../src/default_doc_mapper/default_mapper.rs | 182 ++++-------------- .../src/default_doc_mapper/mapping_tree.rs | 14 +- quickwit/quickwit-search/src/fetch_docs.rs | 5 +- 5 files changed, 75 insertions(+), 217 deletions(-) diff --git a/quickwit/Cargo.lock b/quickwit/Cargo.lock index c7eb4ef1f3a..03d32525198 100644 --- a/quickwit/Cargo.lock +++ b/quickwit/Cargo.lock @@ -2603,19 +2603,6 @@ dependencies = [ "slab", ] -[[package]] -name = "generator" -version = "0.7.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5cc16584ff22b460a382b7feec54b23d2908d858152e5739a120b949293bd74e" -dependencies = [ - "cc", - "libc", - "log", - "rustversion", - "windows 0.48.0", -] - [[package]] name = "generator" version = "0.8.1" @@ -2627,7 +2614,7 @@ dependencies = [ "libc", "log", "rustversion", - "windows 0.54.0", + "windows", ] [[package]] @@ -3377,6 +3364,15 @@ dependencies = [ "either", ] +[[package]] +name = "itertools" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.11" @@ -3914,20 +3910,6 @@ version = "0.4.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" -[[package]] -name = "loom" -version = "0.5.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff50ecb28bb86013e935fb6683ab1f6d3a20016f123c76fd4c27470076ac30f5" -dependencies = [ - "cfg-if", - "generator 0.7.5", - "pin-utils", - "scoped-tls", - "tracing", - "tracing-subscriber", -] - [[package]] name = "loom" version = "0.7.2" @@ -3935,7 +3917,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "419e0dc8046cb947daa77eb95ae174acfbddb7673b4151f56d1eed8e93fbfaca" dependencies = [ "cfg-if", - "generator 0.8.1", + "generator", "pin-utils", "scoped-tls", "tracing", @@ -4438,21 +4420,13 @@ version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" -[[package]] -name = "oneshot" -version = "0.1.6" -source = "git+https://github.com/fulmicoton/oneshot.git?rev=c10a3ba#c10a3ba32adc189acf68acd579ba9755075ecb4d" -dependencies = [ - "loom 0.5.6", -] - [[package]] name = "oneshot" version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "071d1cf3298ad8e543dca18217d198cb6a3884443d204757b9624b935ef09fa0" dependencies = [ - "loom 0.7.2", + "loom", ] [[package]] @@ -4780,7 +4754,7 @@ checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" [[package]] name = "ownedbytes" version = "0.7.0" -source = "git+https://github.com/quickwit-oss/tantivy/?rev=5b7cca1#5b7cca13e5136c7e3b86f645be38b08bed2d6f78" +source = "git+https://github.com/quickwit-oss/tantivy/?rev=93ff736#93ff7365b05793b43903a64203ea7e8c335339b5" dependencies = [ "stable_deref_trait", ] @@ -6018,7 +5992,7 @@ dependencies = [ "libz-sys", "mockall", "once_cell", - "oneshot 0.1.7", + "oneshot", "openssl", "proptest", "prost", @@ -7683,9 +7657,9 @@ checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d" [[package]] name = "sketches-ddsketch" -version = "0.2.2" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85636c14b73d81f541e525f585c0a2109e6744e1565b5c1668e31c70c10ed65c" +checksum = "c1e9a774a6c28142ac54bb25d25562e6bcf957493a184f15ad4eebccb23e410a" dependencies = [ "serde", ] @@ -8182,7 +8156,7 @@ dependencies = [ [[package]] name = "tantivy" version = "0.23.0" -source = "git+https://github.com/quickwit-oss/tantivy/?rev=5b7cca1#5b7cca13e5136c7e3b86f645be38b08bed2d6f78" +source = "git+https://github.com/quickwit-oss/tantivy/?rev=93ff736#93ff7365b05793b43903a64203ea7e8c335339b5" dependencies = [ "aho-corasick", "arc-swap", @@ -8198,7 +8172,7 @@ dependencies = [ "fs4", "futures-util", "htmlescape", - "itertools 0.12.1", + "itertools 0.13.0", "levenshtein_automata", "log", "lru", @@ -8206,7 +8180,7 @@ dependencies = [ "measure_time", "memmap2", "once_cell", - "oneshot 0.1.6", + "oneshot", "rayon", "regex", "rust-stemmers", @@ -8234,7 +8208,7 @@ dependencies = [ [[package]] name = "tantivy-bitpacker" version = "0.6.0" -source = "git+https://github.com/quickwit-oss/tantivy/?rev=5b7cca1#5b7cca13e5136c7e3b86f645be38b08bed2d6f78" +source = "git+https://github.com/quickwit-oss/tantivy/?rev=93ff736#93ff7365b05793b43903a64203ea7e8c335339b5" dependencies = [ "bitpacking", ] @@ -8242,11 +8216,11 @@ dependencies = [ [[package]] name = "tantivy-columnar" version = "0.3.0" -source = "git+https://github.com/quickwit-oss/tantivy/?rev=5b7cca1#5b7cca13e5136c7e3b86f645be38b08bed2d6f78" +source = "git+https://github.com/quickwit-oss/tantivy/?rev=93ff736#93ff7365b05793b43903a64203ea7e8c335339b5" dependencies = [ "downcast-rs", "fastdivide", - "itertools 0.12.1", + "itertools 0.13.0", "serde", "tantivy-bitpacker", "tantivy-common", @@ -8257,7 +8231,7 @@ dependencies = [ [[package]] name = "tantivy-common" version = "0.7.0" -source = "git+https://github.com/quickwit-oss/tantivy/?rev=5b7cca1#5b7cca13e5136c7e3b86f645be38b08bed2d6f78" +source = "git+https://github.com/quickwit-oss/tantivy/?rev=93ff736#93ff7365b05793b43903a64203ea7e8c335339b5" dependencies = [ "async-trait", "byteorder", @@ -8280,7 +8254,7 @@ dependencies = [ [[package]] name = "tantivy-query-grammar" version = "0.22.0" -source = "git+https://github.com/quickwit-oss/tantivy/?rev=5b7cca1#5b7cca13e5136c7e3b86f645be38b08bed2d6f78" +source = "git+https://github.com/quickwit-oss/tantivy/?rev=93ff736#93ff7365b05793b43903a64203ea7e8c335339b5" dependencies = [ "nom", ] @@ -8288,7 +8262,7 @@ dependencies = [ [[package]] name = "tantivy-sstable" version = "0.3.0" -source = "git+https://github.com/quickwit-oss/tantivy/?rev=5b7cca1#5b7cca13e5136c7e3b86f645be38b08bed2d6f78" +source = "git+https://github.com/quickwit-oss/tantivy/?rev=93ff736#93ff7365b05793b43903a64203ea7e8c335339b5" dependencies = [ "tantivy-bitpacker", "tantivy-common", @@ -8299,7 +8273,7 @@ dependencies = [ [[package]] name = "tantivy-stacker" version = "0.3.0" -source = "git+https://github.com/quickwit-oss/tantivy/?rev=5b7cca1#5b7cca13e5136c7e3b86f645be38b08bed2d6f78" +source = "git+https://github.com/quickwit-oss/tantivy/?rev=93ff736#93ff7365b05793b43903a64203ea7e8c335339b5" dependencies = [ "murmurhash32", "rand_distr", @@ -8309,7 +8283,7 @@ dependencies = [ [[package]] name = "tantivy-tokenizer-api" version = "0.3.0" -source = "git+https://github.com/quickwit-oss/tantivy/?rev=5b7cca1#5b7cca13e5136c7e3b86f645be38b08bed2d6f78" +source = "git+https://github.com/quickwit-oss/tantivy/?rev=93ff736#93ff7365b05793b43903a64203ea7e8c335339b5" dependencies = [ "serde", ] @@ -9559,15 +9533,6 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" -[[package]] -name = "windows" -version = "0.48.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e686886bc078bc1b0b600cac0147aadb815089b6e4da64016cbd754b6342700f" -dependencies = [ - "windows-targets 0.48.5", -] - [[package]] name = "windows" version = "0.54.0" diff --git a/quickwit/Cargo.toml b/quickwit/Cargo.toml index 84919ccb453..8871f2540bc 100644 --- a/quickwit/Cargo.toml +++ b/quickwit/Cargo.toml @@ -323,7 +323,7 @@ quickwit-serve = { path = "quickwit-serve" } quickwit-storage = { path = "quickwit-storage" } quickwit-telemetry = { path = "quickwit-telemetry" } -tantivy = { git = "https://github.com/quickwit-oss/tantivy/", rev = "5b7cca1", default-features = false, features = [ +tantivy = { git = "https://github.com/quickwit-oss/tantivy/", rev = "93ff736", default-features = false, features = [ "lz4-compression", "mmap", "quickwit", diff --git a/quickwit/quickwit-doc-mapper/src/default_doc_mapper/default_mapper.rs b/quickwit/quickwit-doc-mapper/src/default_doc_mapper/default_mapper.rs index 537ba1f460e..d2d4f322944 100644 --- a/quickwit/quickwit-doc-mapper/src/default_doc_mapper/default_mapper.rs +++ b/quickwit/quickwit-doc-mapper/src/default_doc_mapper/default_mapper.rs @@ -29,8 +29,9 @@ use quickwit_query::tokenizers::TokenizerManager; use serde::{Deserialize, Serialize}; use serde_json::{self, Value as JsonValue}; use tantivy::query::Query; +use tantivy::schema::document::{ReferenceValue, ReferenceValueLeaf}; use tantivy::schema::{ - Field, FieldType, FieldValue, OwnedValue as TantivyValue, Schema, INDEXED, STORED, + Field, FieldType, OwnedValue as TantivyValue, Schema, Value, INDEXED, STORED, }; use tantivy::TantivyDocument as Document; @@ -446,27 +447,18 @@ fn tantivy_value_to_json(val: TantivyValue) -> JsonValue { } #[inline] -fn populate_field_presence_for_json_value( - json_value: &TantivyValue, +fn populate_field_presence_for_json_value<'a>( + json_value: impl Value<'a>, path_hasher: &PathHasher, is_expand_dots_enabled: bool, output: &mut FnvHashSet, ) { - match json_value { - TantivyValue::Null => {} - TantivyValue::Bool(_) - | TantivyValue::F64(_) - | TantivyValue::I64(_) - | TantivyValue::U64(_) - | TantivyValue::PreTokStr(_) - | TantivyValue::Date(_) - | TantivyValue::Facet(_) - | TantivyValue::Bytes(_) - | TantivyValue::IpAddr(_) - | TantivyValue::Str(_) => { + match json_value.as_value() { + ReferenceValue::Leaf(ReferenceValueLeaf::Null) => {} + ReferenceValue::Leaf(_) => { output.insert(path_hasher.finish()); } - TantivyValue::Array(items) => { + ReferenceValue::Array(items) => { for item in items { populate_field_presence_for_json_value( item, @@ -476,7 +468,7 @@ fn populate_field_presence_for_json_value( ); } } - TantivyValue::Object(json_obj) => { + ReferenceValue::Object(json_obj) => { populate_field_presence_for_json_obj( json_obj, path_hasher.clone(), @@ -487,8 +479,8 @@ fn populate_field_presence_for_json_value( } } -fn populate_field_presence_for_json_obj( - json_obj: &[(String, TantivyValue)], +fn populate_field_presence_for_json_obj<'a, Iter: Iterator)>>( + json_obj: Iter, path_hasher: PathHasher, is_expand_dots_enabled: bool, output: &mut FnvHashSet, @@ -511,49 +503,6 @@ fn populate_field_presence_for_json_obj( } } -fn zip_cloneable, U: Clone>(iter: I, item: U) -> ZipCloneable { - let mut inner = iter.peekable(); - if inner.peek().is_some() { - ZipCloneable::Running { inner, item } - } else { - ZipCloneable::Ended - } -} - -/// An iterator which zip a value alongside another iterator, cloning it each time it yields, -/// except for the last iteration. -#[derive(Default, Debug)] -enum ZipCloneable, U: Clone> { - Running { - inner: std::iter::Peekable, - item: U, - }, - #[default] - Ended, -} - -impl, U: Clone> Iterator for ZipCloneable { - type Item = (T, U); - - fn next(&mut self) -> Option<(T, U)> { - match self { - ZipCloneable::Running { inner, item } => { - let current_value = inner.next()?; - if inner.peek().is_some() { - Some((current_value, item.clone())) - } else { - // we are in the latest iteration, take item so we don't clone it - let ZipCloneable::Running { item, .. } = std::mem::take(self) else { - unreachable!() - }; - Some((current_value, item)) - } - } - ZipCloneable::Ended => None, - } - } -} - #[typetag::serde(name = "default")] impl DocMapper for DefaultDocMapper { fn doc_from_json_obj( @@ -595,10 +544,8 @@ impl DocMapper for DefaultDocMapper { .flat_map(map_primitive_json_to_tantivy); for value in json_obj_values { - for (concatenate_dynamic_field, value) in - zip_cloneable(self.concatenate_dynamic_fields.iter(), value) - { - document.add_field_value(*concatenate_dynamic_field, value); + for concatenate_dynamic_field in self.concatenate_dynamic_fields.iter() { + document.add_field_value(*concatenate_dynamic_field, &value); } } } @@ -619,19 +566,17 @@ impl DocMapper for DefaultDocMapper { // The capacity is inexact here. if self.index_field_presence { - let mut field_presence_hashes: FnvHashSet = FnvHashSet::with_capacity_and_hasher( - document.field_values().len(), - Default::default(), - ); - for FieldValue { field, value } in document.field_values() { - let field_entry = self.schema.get_field_entry(*field); + let mut field_presence_hashes: FnvHashSet = + FnvHashSet::with_capacity_and_hasher(document.len(), Default::default()); + for (field, value) in document.field_values() { + let field_entry = self.schema.get_field_entry(field); if !field_entry.is_indexed() || field_entry.is_fast() { // We are using an tantivy's ExistsQuery for fast fields. continue; } let mut path_hasher: PathHasher = PathHasher::default(); path_hasher.append(&field.field_id().to_le_bytes()[..]); - if let TantivyValue::Object(json_obj) = value { + if let Some(json_obj) = value.as_object() { let is_expand_dots_enabled: bool = if let FieldType::JsonObject(json_options) = field_entry.field_type() { json_options.is_expand_dots_enabled() @@ -649,7 +594,7 @@ impl DocMapper for DefaultDocMapper { } } for field_presence_hash in field_presence_hashes { - document.add_field_value(FIELD_PRESENCE_FIELD, field_presence_hash); + document.add_field_value(FIELD_PRESENCE_FIELD, &field_presence_hash); } } Ok((partition, document)) @@ -719,14 +664,14 @@ impl DocMapper for DefaultDocMapper { mod tests { use std::collections::{HashMap, HashSet}; use std::iter::zip; - use std::sync::atomic::{AtomicUsize, Ordering}; - use std::sync::Arc; use itertools::Itertools; use quickwit_common::PathHasher; use quickwit_query::query_ast::query_ast_from_user_text; use serde_json::{self, json, Value as JsonValue}; - use tantivy::schema::{FieldType, IndexRecordOption, OwnedValue as TantivyValue, Type, Value}; + use tantivy::schema::{ + FieldType, IndexRecordOption, OwnedValue as TantivyValue, OwnedValue, Type, Value, + }; use super::DefaultDocMapper; use crate::default_doc_mapper::field_mapping_entry::DEFAULT_TOKENIZER_NAME; @@ -799,8 +744,9 @@ mod tests { let expected_json_paths_and_values: HashMap = serde_json::from_str(EXPECTED_JSON_PATHS_AND_VALUES).unwrap(); let mut field_presences: HashSet = HashSet::new(); - for field_value in document.field_values() { - let field_name = schema.get_field_name(field_value.field()); + for (field, value) in document.field_values() { + let owned_value: OwnedValue = value.into(); + let field_name = schema.get_field_name(field); if field_name == SOURCE_FIELD_NAME { // some part of aws-sdk enables `preserve_order` on serde_json. // to get "normal" equality, we are forced to recreate the json object @@ -813,19 +759,19 @@ mod tests { .sorted_by(|k1, k2| k1.0.cmp(&k2.0)) .collect::>(); assert_eq!( - tantivy::schema::OwnedValue::from(field_value.value().as_value()), + tantivy::schema::OwnedValue::from(value.as_value()), tantivy::schema::OwnedValue::from(sorted_json_values) ); } else if field_name == DYNAMIC_FIELD_NAME { assert_eq!( - serde_json::to_string(&field_value.value()).unwrap(), + serde_json::to_string(&owned_value).unwrap(), r#"{"response_date2":"2021-12-19T16:39:57Z"}"# ); } else if field_name == FIELD_PRESENCE_FIELD_NAME { - let field_presence_u64 = field_value.value().as_u64().unwrap(); + let field_presence_u64 = value.as_u64().unwrap(); field_presences.insert(field_presence_u64); } else { - let value = serde_json::to_string(field_value.value()).unwrap(); + let value = serde_json::to_string(&owned_value).unwrap(); let is_value_in_expected_values = expected_json_paths_and_values .get(field_name) .unwrap() @@ -1291,18 +1237,19 @@ mod tests { ) .unwrap(); let mut field_presences: HashSet = HashSet::default(); - document.field_values().iter().for_each(|field_value| { - let field_name = schema.get_field_name(field_value.field()); + document.field_values().for_each(|(field, value)| { + let owned_value: OwnedValue = value.into(); + let field_name = schema.get_field_name(field); if field_name == SOURCE_FIELD_NAME { assert_eq!( - tantivy::schema::OwnedValue::from(field_value.value().as_value()), + tantivy::schema::OwnedValue::from(value.as_value()), tantivy::schema::OwnedValue::from(json_doc_value.as_object().unwrap().clone()) ); } else if field_name == FIELD_PRESENCE_FIELD_NAME { - let field_value_hash = field_value.value().as_u64().unwrap(); + let field_value_hash = value.as_u64().unwrap(); field_presences.insert(field_value_hash); } else { - let value = serde_json::to_string(field_value.value()).unwrap(); + let value = serde_json::to_string(&owned_value).unwrap(); let is_value_in_expected_values = expected_json_paths_and_values .get(field_name) .unwrap() @@ -1610,11 +1557,11 @@ mod tests { let field = schema.get_field(field).unwrap(); let (_, doc) = default_doc_mapper.doc_from_json_str(document_json).unwrap(); - let values: Vec<&TantivyValue> = doc.get_all(field).collect(); + let values: Vec = doc.get_all(field).map(|value| value.into()).collect(); assert_eq!(values.len(), expected_values.len()); for (value, expected_value) in zip(values, expected_values) { - assert_eq!(*value, expected_value); + assert_eq!(value, expected_value); } } @@ -2396,59 +2343,4 @@ mod tests { ); } } - - struct CloneLimiter { - clone_left: Arc, - } - - impl Clone for CloneLimiter { - fn clone(&self) -> Self { - if self.clone_left.fetch_sub(1, Ordering::Relaxed) == 0 { - panic!("clone count exceeded"); - } - CloneLimiter { - clone_left: self.clone_left.clone(), - } - } - } - - impl CloneLimiter { - fn new(max_clone: usize) -> Self { - CloneLimiter { - clone_left: Arc::new(AtomicUsize::new(max_clone)), - } - } - } - - #[test] - #[should_panic(expected = "clone count exceeded")] - fn test_clone_limiter_panic() { - let limiter = CloneLimiter::new(1); - let _ = limiter.clone(); - let _ = limiter.clone(); - } - - #[test] - fn test_clone_limiter_doesnt_panic_early() { - let limiter = CloneLimiter::new(1); - let _ = limiter.clone(); - } - - #[test] - fn test_zip_cloneable() { - for (_val, _limiter) in super::zip_cloneable(std::iter::empty::<()>(), CloneLimiter::new(0)) - { - } - - for iter_len in 1..5 { - // to generate an iter with X items, we need only X-1 clone. In particular, for X=1, we - // don't need to clone - let limiter = CloneLimiter::new(iter_len - 1); - for ((val, _limiter), expected) in - super::zip_cloneable(0..iter_len, limiter).zip(0..iter_len) - { - assert_eq!(val, expected); - } - } - } } diff --git a/quickwit/quickwit-doc-mapper/src/default_doc_mapper/mapping_tree.rs b/quickwit/quickwit-doc-mapper/src/default_doc_mapper/mapping_tree.rs index 857a71669e1..2892ce48cce 100644 --- a/quickwit/quickwit-doc-mapper/src/default_doc_mapper/mapping_tree.rs +++ b/quickwit/quickwit-doc-mapper/src/default_doc_mapper/mapping_tree.rs @@ -318,7 +318,7 @@ impl MappingLeaf { .map_err(|err_msg| DocParsingError::ValueError(path.join("."), err_msg))?; for concat_value in concat_values { for field in &self.concatenate { - document.add_field_value(*field, concat_value.clone()); + document.add_field_value(*field, &concat_value); } } } @@ -326,7 +326,7 @@ impl MappingLeaf { .typ .value_from_json(el_json_val) .map_err(|err_msg| DocParsingError::ValueError(path.join("."), err_msg))?; - document.add_field_value(self.field, value); + document.add_field_value(self.field, &value); } return Ok(()); } @@ -338,7 +338,7 @@ impl MappingLeaf { .map_err(|err_msg| DocParsingError::ValueError(path.join("."), err_msg))?; for concat_value in concat_values { for field in &self.concatenate { - document.add_field_value(*field, concat_value.clone()); + document.add_field_value(*field, &concat_value); } } } @@ -346,7 +346,7 @@ impl MappingLeaf { .typ .value_from_json(json_val) .map_err(|err_msg| DocParsingError::ValueError(path.join("."), err_msg))?; - document.add_field_value(self.field, value); + document.add_field_value(self.field, &value); Ok(()) } @@ -1293,7 +1293,7 @@ mod tests { assert_eq!(document.len(), 3); let values: Vec = document .get_all(field) - .flat_map(|val| (&val).as_bool()) + .flat_map(|val| val.as_bool()) .collect(); assert_eq!(&values, &[true, false, true]) } @@ -1345,7 +1345,7 @@ mod tests { assert_eq!(document.len(), 2); let values: Vec = document .get_all(field) - .flat_map(|val| (&val).as_i64()) + .flat_map(|val| val.as_i64()) .collect(); assert_eq!(&values, &[10i64, 20i64]); } @@ -1558,7 +1558,7 @@ mod tests { assert_eq!(document.len(), 2); let bytes_vec: Vec<&[u8]> = document .get_all(field) - .flat_map(|val| (&val).as_bytes()) + .flat_map(|val| val.as_bytes()) .collect(); assert_eq!( &bytes_vec[..], diff --git a/quickwit/quickwit-search/src/fetch_docs.rs b/quickwit/quickwit-search/src/fetch_docs.rs index ed6da6347aa..2873a5d2c41 100644 --- a/quickwit/quickwit-search/src/fetch_docs.rs +++ b/quickwit/quickwit-search/src/fetch_docs.rs @@ -29,7 +29,8 @@ use quickwit_proto::search::{ }; use quickwit_storage::Storage; use tantivy::query::Query; -use tantivy::schema::{Document as DocumentTrait, Field, OwnedValue, TantivyDocument, Value}; +use tantivy::schema::document::CompactDocValue; +use tantivy::schema::{Document as DocumentTrait, Field, TantivyDocument, Value}; use tantivy::snippet::SnippetGenerator; use tantivy::{ReloadPolicy, Score, Searcher, Term}; use tracing::{error, Instrument}; @@ -274,7 +275,7 @@ impl FieldsSnippetGenerator { fn snippets_from_field_values( &self, field_name: &str, - field_values: Vec<&OwnedValue>, + field_values: Vec>, ) -> Option> { if let Some(snippet_generator) = self.field_generators.get(field_name) { let values = field_values From 635793debac7c6e49a7789020c748ada6f1955df Mon Sep 17 00:00:00 2001 From: trinity-1686a Date: Mon, 10 Jun 2024 11:20:07 +0200 Subject: [PATCH 2/2] fix qp deescaping wildcards --- quickwit/Cargo.lock | 18 +++++++++--------- quickwit/Cargo.toml | 2 +- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/quickwit/Cargo.lock b/quickwit/Cargo.lock index 03d32525198..8d433efbc8f 100644 --- a/quickwit/Cargo.lock +++ b/quickwit/Cargo.lock @@ -4754,7 +4754,7 @@ checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" [[package]] name = "ownedbytes" version = "0.7.0" -source = "git+https://github.com/quickwit-oss/tantivy/?rev=93ff736#93ff7365b05793b43903a64203ea7e8c335339b5" +source = "git+https://github.com/quickwit-oss/tantivy/?rev=08b9fc0#08b9fc0b3114640ad06c2358c404c474a9eea3c1" dependencies = [ "stable_deref_trait", ] @@ -8156,7 +8156,7 @@ dependencies = [ [[package]] name = "tantivy" version = "0.23.0" -source = "git+https://github.com/quickwit-oss/tantivy/?rev=93ff736#93ff7365b05793b43903a64203ea7e8c335339b5" +source = "git+https://github.com/quickwit-oss/tantivy/?rev=08b9fc0#08b9fc0b3114640ad06c2358c404c474a9eea3c1" dependencies = [ "aho-corasick", "arc-swap", @@ -8208,7 +8208,7 @@ dependencies = [ [[package]] name = "tantivy-bitpacker" version = "0.6.0" -source = "git+https://github.com/quickwit-oss/tantivy/?rev=93ff736#93ff7365b05793b43903a64203ea7e8c335339b5" +source = "git+https://github.com/quickwit-oss/tantivy/?rev=08b9fc0#08b9fc0b3114640ad06c2358c404c474a9eea3c1" dependencies = [ "bitpacking", ] @@ -8216,7 +8216,7 @@ dependencies = [ [[package]] name = "tantivy-columnar" version = "0.3.0" -source = "git+https://github.com/quickwit-oss/tantivy/?rev=93ff736#93ff7365b05793b43903a64203ea7e8c335339b5" +source = "git+https://github.com/quickwit-oss/tantivy/?rev=08b9fc0#08b9fc0b3114640ad06c2358c404c474a9eea3c1" dependencies = [ "downcast-rs", "fastdivide", @@ -8231,7 +8231,7 @@ dependencies = [ [[package]] name = "tantivy-common" version = "0.7.0" -source = "git+https://github.com/quickwit-oss/tantivy/?rev=93ff736#93ff7365b05793b43903a64203ea7e8c335339b5" +source = "git+https://github.com/quickwit-oss/tantivy/?rev=08b9fc0#08b9fc0b3114640ad06c2358c404c474a9eea3c1" dependencies = [ "async-trait", "byteorder", @@ -8254,7 +8254,7 @@ dependencies = [ [[package]] name = "tantivy-query-grammar" version = "0.22.0" -source = "git+https://github.com/quickwit-oss/tantivy/?rev=93ff736#93ff7365b05793b43903a64203ea7e8c335339b5" +source = "git+https://github.com/quickwit-oss/tantivy/?rev=08b9fc0#08b9fc0b3114640ad06c2358c404c474a9eea3c1" dependencies = [ "nom", ] @@ -8262,7 +8262,7 @@ dependencies = [ [[package]] name = "tantivy-sstable" version = "0.3.0" -source = "git+https://github.com/quickwit-oss/tantivy/?rev=93ff736#93ff7365b05793b43903a64203ea7e8c335339b5" +source = "git+https://github.com/quickwit-oss/tantivy/?rev=08b9fc0#08b9fc0b3114640ad06c2358c404c474a9eea3c1" dependencies = [ "tantivy-bitpacker", "tantivy-common", @@ -8273,7 +8273,7 @@ dependencies = [ [[package]] name = "tantivy-stacker" version = "0.3.0" -source = "git+https://github.com/quickwit-oss/tantivy/?rev=93ff736#93ff7365b05793b43903a64203ea7e8c335339b5" +source = "git+https://github.com/quickwit-oss/tantivy/?rev=08b9fc0#08b9fc0b3114640ad06c2358c404c474a9eea3c1" dependencies = [ "murmurhash32", "rand_distr", @@ -8283,7 +8283,7 @@ dependencies = [ [[package]] name = "tantivy-tokenizer-api" version = "0.3.0" -source = "git+https://github.com/quickwit-oss/tantivy/?rev=93ff736#93ff7365b05793b43903a64203ea7e8c335339b5" +source = "git+https://github.com/quickwit-oss/tantivy/?rev=08b9fc0#08b9fc0b3114640ad06c2358c404c474a9eea3c1" dependencies = [ "serde", ] diff --git a/quickwit/Cargo.toml b/quickwit/Cargo.toml index 8871f2540bc..50b7cda7d65 100644 --- a/quickwit/Cargo.toml +++ b/quickwit/Cargo.toml @@ -323,7 +323,7 @@ quickwit-serve = { path = "quickwit-serve" } quickwit-storage = { path = "quickwit-storage" } quickwit-telemetry = { path = "quickwit-telemetry" } -tantivy = { git = "https://github.com/quickwit-oss/tantivy/", rev = "93ff736", default-features = false, features = [ +tantivy = { git = "https://github.com/quickwit-oss/tantivy/", rev = "08b9fc0", default-features = false, features = [ "lz4-compression", "mmap", "quickwit",