From 7be4105ef7632461630cd940ea070f1975c6fe68 Mon Sep 17 00:00:00 2001 From: Remi Dettai Date: Tue, 17 Dec 2024 17:15:13 +0100 Subject: [PATCH] Support json FAST and object fields --- quickwit/Cargo.lock | 148 ++++++++++-------- quickwit/Cargo.toml | 2 +- .../quickwit-doc-mapper/src/doc_mapper/mod.rs | 46 ++++-- quickwit/quickwit-doc-mapper/src/lib.rs | 6 +- .../quickwit-doc-mapper/src/query_builder.rs | 100 ++++++++---- .../src/query_ast/field_presence.rs | 84 +++++++--- .../src/query_ast/full_text_query.rs | 3 +- .../src/query_ast/phrase_prefix_query.rs | 5 +- .../src/query_ast/range_query.rs | 6 +- .../quickwit-query/src/query_ast/utils.rs | 46 ++++-- .../src/query_ast/wildcard_query.rs | 7 +- quickwit/quickwit-search/src/collector.rs | 11 +- quickwit/quickwit-search/src/leaf.rs | 22 ++- quickwit/quickwit-search/src/root.rs | 2 +- .../quickwit-search/src/search_stream/leaf.rs | 23 ++- .../qw_search_api/0004_exists_search.yaml | 55 ++++--- .../qw_search_api/_setup.quickwit.yaml | 11 +- 17 files changed, 376 insertions(+), 201 deletions(-) diff --git a/quickwit/Cargo.lock b/quickwit/Cargo.lock index 27411942af1..0fa6c4bd34e 100644 --- a/quickwit/Cargo.lock +++ b/quickwit/Cargo.lock @@ -679,7 +679,7 @@ dependencies = [ "regex-lite", "roxmltree 0.14.1", "serde_json", - "thiserror", + "thiserror 1.0.69", ] [[package]] @@ -1052,7 +1052,7 @@ dependencies = [ "miniserde", "peakmem-alloc", "perf-event", - "rustc-hash 2.0.0", + "rustc-hash", "rustop", "unicode-width 0.1.14", "yansi", @@ -2908,7 +2908,7 @@ dependencies = [ "reqwest", "serde", "serde_json", - "thiserror", + "thiserror 1.0.69", "time", "tokio", "tracing", @@ -2923,7 +2923,7 @@ checksum = "f8bdaaa4bc036e8318274d1b25f0f2265b3e95418b765fd1ea1c7ef938fd69bd" dependencies = [ "google-cloud-token", "http 0.2.12", - "thiserror", + "thiserror 1.0.69", "tokio", "tokio-retry", "tonic", @@ -2949,7 +2949,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "96e4ad0802d3f416f62e7ce01ac1460898ee0efc98f8b45cd4aab7611607012f" dependencies = [ "reqwest", - "thiserror", + "thiserror 1.0.69", "tokio", ] @@ -2966,7 +2966,7 @@ dependencies = [ "google-cloud-googleapis", "google-cloud-token", "prost-types 0.11.9", - "thiserror", + "thiserror 1.0.69", "tokio", "tokio-util", "tracing", @@ -3723,9 +3723,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222" dependencies = [ "cfg-if", - "js-sys", - "wasm-bindgen", - "web-sys", ] [[package]] @@ -4139,7 +4136,7 @@ dependencies = [ "log", "once_cell", "serde", - "thiserror", + "thiserror 1.0.69", "yada", ] @@ -4419,11 +4416,10 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771" [[package]] name = "measure_time" -version = "0.8.3" +version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dbefd235b0aadd181626f281e1d684e116972988c14c264e42069d5e8a5775cc" +checksum = "51c55d61e72fc3ab704396c5fa16f4c184db37978ae4e94ca8959693a235fc0e" dependencies = [ - "instant", "log", ] @@ -4553,7 +4549,7 @@ dependencies = [ "rustc_version", "smallvec", "tagptr", - "thiserror", + "thiserror 1.0.69", "triomphe", "uuid", ] @@ -4567,7 +4563,7 @@ dependencies = [ "crc32fast", "serde", "serde_json", - "thiserror", + "thiserror 1.0.69", "tracing", ] @@ -4863,7 +4859,7 @@ dependencies = [ "serde_json", "serde_path_to_error", "sha2", - "thiserror", + "thiserror 1.0.69", "url", ] @@ -4999,7 +4995,7 @@ dependencies = [ "serde_with", "sha2", "subtle", - "thiserror", + "thiserror 1.0.69", "url", ] @@ -5096,7 +5092,7 @@ dependencies = [ "opentelemetry_sdk", "prost 0.11.9", "reqwest", - "thiserror", + "thiserror 1.0.69", "tokio", "tonic", ] @@ -5134,7 +5130,7 @@ dependencies = [ "js-sys", "once_cell", "pin-project-lite", - "thiserror", + "thiserror 1.0.69", "urlencoding", ] @@ -5156,7 +5152,7 @@ dependencies = [ "rand 0.8.5", "regex", "serde_json", - "thiserror", + "thiserror 1.0.69", "tokio", "tokio-stream", ] @@ -5238,7 +5234,7 @@ checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" [[package]] name = "ownedbytes" version = "0.7.0" -source = "git+https://github.com/quickwit-oss/tantivy/?rev=2f2db16#2f2db16ec10656f9a7ef37018d38e0c6fb5edbe5" +source = "git+https://github.com/quickwit-oss/tantivy/?rev=1a25b6125#1a25b612507ba380d81a648c68bd3d4a3bb6f37d" dependencies = [ "stable_deref_trait", ] @@ -5440,7 +5436,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "879952a81a83930934cbf1786752d6dedc3b1f29e8f8fb2ad1d0a36f377cf442" dependencies = [ "memchr", - "thiserror", + "thiserror 1.0.69", "ucd-trie", ] @@ -5793,7 +5789,7 @@ dependencies = [ "smallvec", "symbolic-demangle", "tempfile", - "thiserror", + "thiserror 1.0.69", ] [[package]] @@ -5996,7 +5992,7 @@ dependencies = [ "parking_lot", "procfs", "protobuf", - "thiserror", + "thiserror 1.0.69", ] [[package]] @@ -6282,7 +6278,7 @@ dependencies = [ "serde", "serde_json", "sync_wrapper", - "thiserror", + "thiserror 1.0.69", "tokio", "tracing", ] @@ -6346,7 +6342,7 @@ dependencies = [ "serde_json", "tabled", "tempfile", - "thiserror", + "thiserror 1.0.69", "thousands", "tikv-jemalloc-ctl", "tikv-jemallocator", @@ -6416,7 +6412,7 @@ dependencies = [ "quickwit-common", "quickwit-proto", "serde", - "thiserror", + "thiserror 1.0.69", "tokio", "tokio-stream", "tonic", @@ -6454,7 +6450,7 @@ dependencies = [ "serde_json", "siphasher", "tempfile", - "thiserror", + "thiserror 1.0.69", "tokio", "tokio-metrics", "tokio-stream", @@ -6585,7 +6581,7 @@ dependencies = [ "serde_yaml", "siphasher", "tantivy", - "thiserror", + "thiserror 1.0.69", "time", "tracing", "utoipa", @@ -6605,7 +6601,7 @@ dependencies = [ "quickwit-metastore", "quickwit-proto", "quickwit-storage", - "thiserror", + "thiserror 1.0.69", "time", "tokio", "tracing", @@ -6663,7 +6659,7 @@ dependencies = [ "serde_json", "tantivy", "tempfile", - "thiserror", + "thiserror 1.0.69", "time", "tokio", "tracing", @@ -6701,7 +6697,7 @@ dependencies = [ "serde_json", "serde_json_borrow", "tempfile", - "thiserror", + "thiserror 1.0.69", "tokio", "tonic", "tower", @@ -6796,7 +6792,7 @@ dependencies = [ "serde_json", "tantivy", "tempfile", - "thiserror", + "thiserror 1.0.69", "time", "tokio", "tracing", @@ -6890,7 +6886,7 @@ dependencies = [ "serial_test", "sqlx", "tempfile", - "thiserror", + "thiserror 1.0.69", "time", "tokio", "tokio-stream", @@ -6917,7 +6913,7 @@ dependencies = [ "quickwit-proto", "serde", "serde_json", - "thiserror", + "thiserror 1.0.69", "time", "tokio", "tonic", @@ -6948,7 +6944,7 @@ dependencies = [ "serde", "serde_json", "sqlx", - "thiserror", + "thiserror 1.0.69", "tokio", "tonic", "tonic-build", @@ -6979,7 +6975,7 @@ dependencies = [ "serde_json", "serde_with", "tantivy", - "thiserror", + "thiserror 1.0.69", "time", "whichlang", ] @@ -7001,7 +6997,7 @@ dependencies = [ "reqwest", "serde", "serde_json", - "thiserror", + "thiserror 1.0.69", "tokio", "tracing", "wiremock", @@ -7041,7 +7037,7 @@ dependencies = [ "serde", "serde_json", "tantivy", - "thiserror", + "thiserror 1.0.69", "tokio", "tokio-stream", "tower", @@ -7104,7 +7100,7 @@ dependencies = [ "serde_qs 0.12.0", "serde_with", "tempfile", - "thiserror", + "thiserror 1.0.69", "time", "tokio", "tokio-stream", @@ -7156,7 +7152,7 @@ dependencies = [ "serde_json", "tantivy", "tempfile", - "thiserror", + "thiserror 1.0.69", "tokio", "tokio-stream", "tokio-util", @@ -7375,7 +7371,7 @@ checksum = "ba009ff324d1fc1b900bd1fdb31564febe58a8ccc8a6fdbb93b543d33b13ca43" dependencies = [ "getrandom 0.2.15", "libredox", - "thiserror", + "thiserror 1.0.69", ] [[package]] @@ -7709,12 +7705,6 @@ version = "0.1.24" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" -[[package]] -name = "rustc-hash" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" - [[package]] name = "rustc-hash" version = "2.0.0" @@ -7944,7 +7934,7 @@ dependencies = [ "proc-macro2", "quote", "syn 2.0.89", - "thiserror", + "thiserror 1.0.69", ] [[package]] @@ -8106,7 +8096,7 @@ checksum = "c7715380eec75f029a4ef7de39a9200e0a63823176b759d055b613f5a87df6a6" dependencies = [ "percent-encoding", "serde", - "thiserror", + "thiserror 1.0.69", ] [[package]] @@ -8118,7 +8108,7 @@ dependencies = [ "futures", "percent-encoding", "serde", - "thiserror", + "thiserror 1.0.69", "tracing", "warp", ] @@ -8330,7 +8320,7 @@ checksum = "adc4e5204eb1910f40f9cfa375f6f05b68c3abac4b6fd879c8ff5e7ae8a0a085" dependencies = [ "num-bigint", "num-traits", - "thiserror", + "thiserror 1.0.69", "time", ] @@ -8515,7 +8505,7 @@ dependencies = [ "sha2", "smallvec", "sqlformat", - "thiserror", + "thiserror 1.0.69", "time", "tokio", "tokio-stream", @@ -8600,7 +8590,7 @@ dependencies = [ "smallvec", "sqlx-core", "stringprep", - "thiserror", + "thiserror 1.0.69", "time", "tracing", "whoami", @@ -8639,7 +8629,7 @@ dependencies = [ "smallvec", "sqlx-core", "stringprep", - "thiserror", + "thiserror 1.0.69", "time", "tracing", "whoami", @@ -8860,7 +8850,7 @@ checksum = "7b2093cf4c8eb1e67749a6762251bc9cd836b6fc171623bd0a9d324d37af2417" [[package]] name = "tantivy" version = "0.23.0" -source = "git+https://github.com/quickwit-oss/tantivy/?rev=2f2db16#2f2db16ec10656f9a7ef37018d38e0c6fb5edbe5" +source = "git+https://github.com/quickwit-oss/tantivy/?rev=1a25b6125#1a25b612507ba380d81a648c68bd3d4a3bb6f37d" dependencies = [ "aho-corasick", "arc-swap", @@ -8889,7 +8879,7 @@ dependencies = [ "rayon", "regex", "rust-stemmers", - "rustc-hash 1.1.0", + "rustc-hash", "serde", "serde_json", "sketches-ddsketch", @@ -8903,7 +8893,7 @@ dependencies = [ "tantivy-stacker", "tantivy-tokenizer-api", "tempfile", - "thiserror", + "thiserror 2.0.6", "time", "uuid", "winapi 0.3.9", @@ -8913,7 +8903,7 @@ dependencies = [ [[package]] name = "tantivy-bitpacker" version = "0.6.0" -source = "git+https://github.com/quickwit-oss/tantivy/?rev=2f2db16#2f2db16ec10656f9a7ef37018d38e0c6fb5edbe5" +source = "git+https://github.com/quickwit-oss/tantivy/?rev=1a25b6125#1a25b612507ba380d81a648c68bd3d4a3bb6f37d" dependencies = [ "bitpacking", ] @@ -8921,7 +8911,7 @@ dependencies = [ [[package]] name = "tantivy-columnar" version = "0.3.0" -source = "git+https://github.com/quickwit-oss/tantivy/?rev=2f2db16#2f2db16ec10656f9a7ef37018d38e0c6fb5edbe5" +source = "git+https://github.com/quickwit-oss/tantivy/?rev=1a25b6125#1a25b612507ba380d81a648c68bd3d4a3bb6f37d" dependencies = [ "downcast-rs", "fastdivide", @@ -8936,7 +8926,7 @@ dependencies = [ [[package]] name = "tantivy-common" version = "0.7.0" -source = "git+https://github.com/quickwit-oss/tantivy/?rev=2f2db16#2f2db16ec10656f9a7ef37018d38e0c6fb5edbe5" +source = "git+https://github.com/quickwit-oss/tantivy/?rev=1a25b6125#1a25b612507ba380d81a648c68bd3d4a3bb6f37d" dependencies = [ "async-trait", "byteorder", @@ -8959,7 +8949,7 @@ dependencies = [ [[package]] name = "tantivy-query-grammar" version = "0.22.0" -source = "git+https://github.com/quickwit-oss/tantivy/?rev=2f2db16#2f2db16ec10656f9a7ef37018d38e0c6fb5edbe5" +source = "git+https://github.com/quickwit-oss/tantivy/?rev=1a25b6125#1a25b612507ba380d81a648c68bd3d4a3bb6f37d" dependencies = [ "nom", ] @@ -8967,7 +8957,7 @@ dependencies = [ [[package]] name = "tantivy-sstable" version = "0.3.0" -source = "git+https://github.com/quickwit-oss/tantivy/?rev=2f2db16#2f2db16ec10656f9a7ef37018d38e0c6fb5edbe5" +source = "git+https://github.com/quickwit-oss/tantivy/?rev=1a25b6125#1a25b612507ba380d81a648c68bd3d4a3bb6f37d" dependencies = [ "tantivy-bitpacker", "tantivy-common", @@ -8978,7 +8968,7 @@ dependencies = [ [[package]] name = "tantivy-stacker" version = "0.3.0" -source = "git+https://github.com/quickwit-oss/tantivy/?rev=2f2db16#2f2db16ec10656f9a7ef37018d38e0c6fb5edbe5" +source = "git+https://github.com/quickwit-oss/tantivy/?rev=1a25b6125#1a25b612507ba380d81a648c68bd3d4a3bb6f37d" dependencies = [ "murmurhash32", "rand_distr", @@ -8988,7 +8978,7 @@ dependencies = [ [[package]] name = "tantivy-tokenizer-api" version = "0.3.0" -source = "git+https://github.com/quickwit-oss/tantivy/?rev=2f2db16#2f2db16ec10656f9a7ef37018d38e0c6fb5edbe5" +source = "git+https://github.com/quickwit-oss/tantivy/?rev=1a25b6125#1a25b612507ba380d81a648c68bd3d4a3bb6f37d" dependencies = [ "serde", ] @@ -9055,7 +9045,16 @@ version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" dependencies = [ - "thiserror-impl", + "thiserror-impl 1.0.69", +] + +[[package]] +name = "thiserror" +version = "2.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fec2a1820ebd077e2b90c4df007bebf344cd394098a13c563957d0afc83ea47" +dependencies = [ + "thiserror-impl 2.0.6", ] [[package]] @@ -9069,6 +9068,17 @@ dependencies = [ "syn 2.0.89", ] +[[package]] +name = "thiserror-impl" +version = "2.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d65750cab40f4ff1929fb1ba509e9914eb756131cef4210da8d5d700d26f6312" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.89", +] + [[package]] name = "thousands" version = "0.2.0" @@ -9145,7 +9155,7 @@ version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78bfd61bca99323ce96911bd2c443259115460615e44f1d449cee8cb3831a1dd" dependencies = [ - "thiserror", + "thiserror 1.0.69", "time", ] @@ -9607,7 +9617,7 @@ dependencies = [ "log", "rand 0.8.5", "sha1", - "thiserror", + "thiserror 1.0.69", "url", "utf-8", ] @@ -9942,7 +9952,7 @@ dependencies = [ "strip-ansi-escapes", "syslog_loose", "termcolor", - "thiserror", + "thiserror 1.0.69", "tokio", "tracing", "uaparser", diff --git a/quickwit/Cargo.toml b/quickwit/Cargo.toml index 9c91d6efd58..8cd422d66a2 100644 --- a/quickwit/Cargo.toml +++ b/quickwit/Cargo.toml @@ -328,7 +328,7 @@ quickwit-serve = { path = "quickwit-serve" } quickwit-storage = { path = "quickwit-storage" } quickwit-telemetry = { path = "quickwit-telemetry" } -tantivy = { git = "https://github.com/quickwit-oss/tantivy/", rev = "2f2db16", default-features = false, features = [ +tantivy = { git = "https://github.com/quickwit-oss/tantivy/", rev = "1a25b6125", default-features = false, features = [ "lz4-compression", "mmap", "quickwit", diff --git a/quickwit/quickwit-doc-mapper/src/doc_mapper/mod.rs b/quickwit/quickwit-doc-mapper/src/doc_mapper/mod.rs index 146c2f1f51c..c0aaf27d487 100644 --- a/quickwit/quickwit-doc-mapper/src/doc_mapper/mod.rs +++ b/quickwit/quickwit-doc-mapper/src/doc_mapper/mod.rs @@ -85,6 +85,15 @@ pub struct TermRange { pub limit: Option, } +/// Description of how a fast field should be warmed up +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct FastFieldWarmupInfo { + /// Name of the fast field + pub name: String, + /// Whether subfields should also be loaded for warmup + pub with_subfields: bool, +} + /// Information about what a DocMapper think should be warmed up before /// running the query. #[derive(Debug, Default, Clone, PartialEq, Eq)] @@ -92,8 +101,8 @@ pub struct WarmupInfo { /// Name of fields from the term dictionary and posting list which needs to /// be entirely loaded pub term_dict_fields: HashSet, - /// Name of fast fields which needs to be loaded - pub fast_field_names: HashSet, + /// Fast fields which needs to be loaded + pub fast_fields: HashSet, /// Whether to warmup field norms. Used mostly for scoring. pub field_norms: bool, /// Terms to warmup, and whether their position is needed too. @@ -106,9 +115,18 @@ impl WarmupInfo { /// Merge other WarmupInfo into self. pub fn merge(&mut self, other: WarmupInfo) { self.term_dict_fields.extend(other.term_dict_fields); - self.fast_field_names.extend(other.fast_field_names); self.field_norms |= other.field_norms; + for fast_field_warmup_info in other.fast_fields.into_iter() { + // avoid overwriting with a less demanding warmup + if !self.fast_fields.contains(&FastFieldWarmupInfo { + name: fast_field_warmup_info.name.clone(), + with_subfields: true, + }) { + self.fast_fields.insert(fast_field_warmup_info); + } + } + for (field, term_and_pos) in other.terms_grouped_by_field.into_iter() { let sub_map = self.terms_grouped_by_field.entry(field).or_default(); @@ -571,8 +589,14 @@ mod tests { } } - fn hashset(elements: &[&str]) -> HashSet { - elements.iter().map(|elem| elem.to_string()).collect() + fn hashset_fast(elements: &[&str]) -> HashSet { + elements + .iter() + .map(|elem| FastFieldWarmupInfo { + name: elem.to_string(), + with_subfields: false, + }) + .collect() } fn hashset_field(elements: &[u32]) -> HashSet { @@ -617,7 +641,7 @@ mod tests { fn test_warmup_info_merge() { let wi_base = WarmupInfo { term_dict_fields: hashset_field(&[1, 2]), - fast_field_names: hashset(&["fast1", "fast2"]), + fast_fields: hashset_fast(&["fast1", "fast2"]), field_norms: false, terms_grouped_by_field: hashmap(&[(1, "term1", false), (1, "term2", false)]), term_ranges_grouped_by_field: hashmap_ranges(&[ @@ -634,7 +658,7 @@ mod tests { let mut wi_base = wi_base; let wi_2 = WarmupInfo { term_dict_fields: hashset_field(&[2, 3]), - fast_field_names: hashset(&["fast2", "fast3"]), + fast_fields: hashset_fast(&["fast2", "fast3"]), field_norms: true, terms_grouped_by_field: hashmap(&[(2, "term1", false), (1, "term2", true)]), term_ranges_grouped_by_field: hashmap_ranges(&[ @@ -646,8 +670,8 @@ mod tests { assert_eq!(wi_base.term_dict_fields, hashset_field(&[1, 2, 3])); assert_eq!( - wi_base.fast_field_names, - hashset(&["fast1", "fast2", "fast3"]) + wi_base.fast_fields, + hashset_fast(&["fast1", "fast2", "fast3"]) ); assert!(wi_base.field_norms); @@ -698,7 +722,7 @@ mod tests { fn test_warmup_info_simplify() { let mut warmup_info = WarmupInfo { term_dict_fields: hashset_field(&[1]), - fast_field_names: hashset(&["fast1", "fast2"]), + fast_fields: hashset_fast(&["fast1", "fast2"]), field_norms: false, terms_grouped_by_field: hashmap(&[ (1, "term1", false), @@ -713,7 +737,7 @@ mod tests { }; let expected = WarmupInfo { term_dict_fields: hashset_field(&[1]), - fast_field_names: hashset(&["fast1", "fast2"]), + fast_fields: hashset_fast(&["fast1", "fast2"]), field_norms: false, terms_grouped_by_field: hashmap(&[(1, "term2", true), (2, "term3", false)]), term_ranges_grouped_by_field: hashmap_ranges(&[ diff --git a/quickwit/quickwit-doc-mapper/src/lib.rs b/quickwit/quickwit-doc-mapper/src/lib.rs index c592616e86a..9c940076a5a 100644 --- a/quickwit/quickwit-doc-mapper/src/lib.rs +++ b/quickwit/quickwit-doc-mapper/src/lib.rs @@ -35,9 +35,9 @@ mod routing_expression; pub mod tag_pruning; pub use doc_mapper::{ - analyze_text, BinaryFormat, DocMapper, DocMapperBuilder, FieldMappingEntry, FieldMappingType, - JsonObject, NamedField, QuickwitBytesOptions, QuickwitJsonOptions, TermRange, TokenizerConfig, - TokenizerEntry, WarmupInfo, + analyze_text, BinaryFormat, DocMapper, DocMapperBuilder, FastFieldWarmupInfo, + FieldMappingEntry, FieldMappingType, JsonObject, NamedField, QuickwitBytesOptions, + QuickwitJsonOptions, TermRange, TokenizerConfig, TokenizerEntry, WarmupInfo, }; use doc_mapper::{ FastFieldOptions, FieldMappingEntryForSerialization, IndexRecordOptionSchema, diff --git a/quickwit/quickwit-doc-mapper/src/query_builder.rs b/quickwit/quickwit-doc-mapper/src/query_builder.rs index 7443fda3273..ed8e908a4b1 100644 --- a/quickwit/quickwit-doc-mapper/src/query_builder.rs +++ b/quickwit/quickwit-doc-mapper/src/query_builder.rs @@ -30,7 +30,9 @@ use quickwit_query::{find_field_or_hit_dynamic, InvalidQuery}; use tantivy::query::Query; use tantivy::schema::{Field, Schema}; use tantivy::Term; +use tracing::error; +use crate::doc_mapper::FastFieldWarmupInfo; use crate::{QueryParserError, TermRange, WarmupInfo}; #[derive(Default)] @@ -48,22 +50,37 @@ impl<'a> QueryAstVisitor<'a> for RangeQueryFields { } } -#[derive(Default)] -struct ExistsQueryFields { - exists_query_field_names: HashSet, +struct ExistsQueryFastFields { + fields: HashSet, + schema: Schema, } -impl<'a> QueryAstVisitor<'a> for ExistsQueryFields { +impl<'a> QueryAstVisitor<'a> for ExistsQueryFastFields { type Err = Infallible; fn visit_exists(&mut self, exists_query: &'a FieldPresenceQuery) -> Result<(), Infallible> { - // If the field is a fast field, we will rely on the `ColumnIndex`. - // If the field is not a fast, we will rely on the field presence field. - // - // After all field names are collected they are checked against schema and - // non-fast fields are removed from warmup operation. - self.exists_query_field_names - .insert(exists_query.field.to_string()); + let fields = exists_query.find_field_and_subfields(&self.schema); + for (_, field_entry, path) in fields { + if field_entry.is_fast() { + if field_entry.field_type().is_json() { + let full_path = format!("{}.{}", field_entry.name(), path); + self.fields.insert(FastFieldWarmupInfo { + name: full_path, + with_subfields: true, + }); + } else if path.is_empty() { + self.fields.insert(FastFieldWarmupInfo { + name: field_entry.name().to_string(), + with_subfields: false, + }); + } else { + error!( + field_entry = field_entry.name(), + path, "only JSON type supports subfields" + ); + } + } + } Ok(()) } } @@ -80,18 +97,24 @@ pub(crate) fn build_query( // This cannot fail. The error type is Infallible. let _: Result<(), Infallible> = range_query_fields.visit(query_ast); - let mut exists_query_fields = ExistsQueryFields::default(); + let mut exists_query_fields = ExistsQueryFastFields { + fields: HashSet::new(), + schema: schema.clone(), + }; // This cannot fail. The error type is Infallible. let _: Result<(), Infallible> = exists_query_fields.visit(query_ast); let mut fast_field_names = HashSet::new(); - fast_field_names.extend(range_query_fields.range_query_field_names); - fast_field_names.extend( - exists_query_fields - .exists_query_field_names + let range_query_fast_fields = + range_query_fields + .range_query_field_names .into_iter() - .filter(|field| is_fast_field(&schema, field)), - ); + .map(|name| FastFieldWarmupInfo { + name, + with_subfields: false, + }); + fast_field_names.extend(range_query_fast_fields); + fast_field_names.extend(exists_query_fields.fields.into_iter()); let query = query_ast.build_tantivy_query( &schema, @@ -118,20 +141,13 @@ pub(crate) fn build_query( term_dict_fields: term_set_query_fields, terms_grouped_by_field, term_ranges_grouped_by_field, - fast_field_names, + fast_fields: fast_field_names, ..WarmupInfo::default() }; Ok((query, warmup_info)) } -fn is_fast_field(schema: &Schema, field_name: &str) -> bool { - if let Ok((_field, field_entry, _path)) = find_field_or_hit_dynamic(field_name, schema) { - return field_entry.is_fast(); - } - false -} - struct ExtractTermSetFields<'a> { term_dict_fields_to_warm_up: HashSet, schema: &'a Schema, @@ -151,7 +167,8 @@ impl<'a> QueryAstVisitor<'a> for ExtractTermSetFields<'_> { fn visit_term_set(&mut self, term_set_query: &'a TermSetQuery) -> anyhow::Result<()> { for field in term_set_query.terms_per_field.keys() { - if let Ok((field, _field_entry, _path)) = find_field_or_hit_dynamic(field, self.schema) + if let Some((field, _field_entry, _path)) = + find_field_or_hit_dynamic(field, self.schema) { self.term_dict_fields_to_warm_up.insert(field); } else { @@ -574,20 +591,41 @@ mod test { check_build_query_static_mode( "ip:*", Vec::new(), - TestExpectation::Ok("ExistsQuery { field_name: \"ip\" }"), + TestExpectation::Ok("ExistsQuery { field_name: \"ip\", json_subpaths: true }"), ); - check_build_query_static_mode( "json_text:*", Vec::new(), TestExpectation::Ok("TermQuery(Term(field=0, type=U64"), ); - check_build_query_static_mode( "json_fast:*", Vec::new(), - TestExpectation::Ok("ExistsQuery { field_name: \"json_fast\" }"), + TestExpectation::Ok("ExistsQuery { field_name: \"json_fast\", json_subpaths: true }"), + ); + check_build_query_static_mode( + "foo:*", + Vec::new(), + TestExpectation::Err("invalid query: field does not exist: `foo`"), ); + check_build_query_static_mode( + "server:*", + Vec::new(), + TestExpectation::Ok("?????????????????"), + ); + + // V currently working V + // check_build_query_static_mode( + // "server:*", + // Vec::new(), + // TestExpectation::Err("invalid query: field does not exist: `server`"), + // ); + // check_build_query_dynamic_mode( + // "server:*", + // Vec::new(), + // // dynamic_mapping is set to TEXT here, this would be an ExistsQuery if it was FAST + // TestExpectation::Ok("TermQuery(Term(field=0, type=U64"), + // ); } #[test] diff --git a/quickwit/quickwit-query/src/query_ast/field_presence.rs b/quickwit/quickwit-query/src/query_ast/field_presence.rs index df82ab3591a..220618b869a 100644 --- a/quickwit/quickwit-query/src/query_ast/field_presence.rs +++ b/quickwit/quickwit-query/src/query_ast/field_presence.rs @@ -20,13 +20,15 @@ use quickwit_common::shared_consts::FIELD_PRESENCE_FIELD_NAME; use quickwit_common::PathHasher; use serde::{Deserialize, Serialize}; -use tantivy::schema::{Field, IndexRecordOption, Schema as TantivySchema}; +use tantivy::schema::{Field, FieldEntry, IndexRecordOption, Schema as TantivySchema}; use tantivy::Term; +use super::tantivy_query_ast::TantivyBoolQuery; +use super::utils::{find_subfields, DYNAMIC_FIELD_NAME}; use crate::query_ast::tantivy_query_ast::TantivyQueryAst; use crate::query_ast::{BuildTantivyAst, QueryAst}; use crate::tokenizers::TokenizerManager; -use crate::{find_field_or_hit_dynamic, InvalidQuery}; +use crate::{find_field_or_hit_dynamic, BooleanOperand, InvalidQuery}; #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] pub struct FieldPresenceQuery { @@ -69,6 +71,51 @@ fn compute_field_presence_hash(field: Field, field_path: &str) -> u64 { path_hasher.finish() } +fn build_leaf_existence_query( + field_presence_field: Field, + leaf_field: Field, + field_entry: &FieldEntry, + path: &str, +) -> TantivyQueryAst { + if field_entry.is_fast() { + let full_path = if path.is_empty() { + field_entry.name().to_string() + } else { + format!("{}.{}", field_entry.name(), path) + }; + let exists_query = tantivy::query::ExistsQuery::new(full_path, true); + TantivyQueryAst::from(exists_query) + } else { + // fallback to the presence field + let field_presence_hash = compute_field_presence_hash(leaf_field, path); + let field_presence_term: Term = + Term::from_field_u64(field_presence_field, field_presence_hash); + let field_presence_term_query = + tantivy::query::TermQuery::new(field_presence_term, IndexRecordOption::Basic); + TantivyQueryAst::from(field_presence_term_query) + } +} + +impl FieldPresenceQuery { + /// Identify the field and potential subfields that are required for this query. + pub fn find_field_and_subfields<'a>( + &'a self, + schema: &'a TantivySchema, + ) -> Vec<(Field, &'a FieldEntry, &'a str)> { + let mut fields = Vec::new(); + if let Some((field, entry, path)) = find_field_or_hit_dynamic(&self.field, schema) { + fields.push((field, entry, path)); + }; + // if `self.field` was not found, it might still be an `object` field + if fields.is_empty() || fields[0].1.name() == DYNAMIC_FIELD_NAME { + for (field, entry) in find_subfields(&self.field, schema) { + fields.push((field, entry, &"")); + } + } + fields + } +} + impl BuildTantivyAst for FieldPresenceQuery { fn build_tantivy_ast_impl( &self, @@ -80,24 +127,23 @@ impl BuildTantivyAst for FieldPresenceQuery { let field_presence_field = schema.get_field(FIELD_PRESENCE_FIELD_NAME).map_err(|_| { InvalidQuery::SchemaError("field presence is not available for this split".to_string()) })?; - let (field, field_entry, path) = find_field_or_hit_dynamic(&self.field, schema)?; - if field_entry.is_fast() { - let full_path = if path.is_empty() { - field_entry.name().to_string() - } else { - format!("{}.{}", field_entry.name(), path) - }; - let exists_query = tantivy::query::ExistsQuery::new_exists_query(full_path); - Ok(TantivyQueryAst::from(exists_query)) - } else { - // fallback to the presence field - let field_presence_hash = compute_field_presence_hash(field, path); - let field_presence_term: Term = - Term::from_field_u64(field_presence_field, field_presence_hash); - let field_presence_term_query = - tantivy::query::TermQuery::new(field_presence_term, IndexRecordOption::Basic); - Ok(TantivyQueryAst::from(field_presence_term_query)) + let fields = self.find_field_and_subfields(schema); + if fields.is_empty() { + // the schema is not dynamic and no subfields are defined + return Err(InvalidQuery::FieldDoesNotExist { + full_path: self.field.clone(), + }); } + let queries = fields + .into_iter() + .map(|(field, entry, path)| { + build_leaf_existence_query(field_presence_field, field, entry, path) + }) + .collect(); + Ok(TantivyQueryAst::Bool(TantivyBoolQuery::build_clause( + BooleanOperand::Or, + queries, + ))) } } diff --git a/quickwit/quickwit-query/src/query_ast/full_text_query.rs b/quickwit/quickwit-query/src/query_ast/full_text_query.rs index 661bb89039f..55efd9b7131 100644 --- a/quickwit/quickwit-query/src/query_ast/full_text_query.rs +++ b/quickwit/quickwit-query/src/query_ast/full_text_query.rs @@ -271,8 +271,7 @@ impl FullTextQuery { return None; }; - let (field, field_entry, json_path) = - find_field_or_hit_dynamic(&self.field, schema).ok()?; + let (field, field_entry, json_path) = find_field_or_hit_dynamic(&self.field, schema)?; let field_type: &FieldType = field_entry.field_type(); match field_type { FieldType::Str(text_options) => { diff --git a/quickwit/quickwit-query/src/query_ast/phrase_prefix_query.rs b/quickwit/quickwit-query/src/query_ast/phrase_prefix_query.rs index 1675b22d760..3181764821d 100644 --- a/quickwit/quickwit-query/src/query_ast/phrase_prefix_query.rs +++ b/quickwit/quickwit-query/src/query_ast/phrase_prefix_query.rs @@ -48,7 +48,10 @@ impl PhrasePrefixQuery { schema: &TantivySchema, tokenizer_manager: &TokenizerManager, ) -> Result<(Field, Vec<(usize, Term)>), InvalidQuery> { - let (field, field_entry, json_path) = find_field_or_hit_dynamic(&self.field, schema)?; + let (field, field_entry, json_path) = find_field_or_hit_dynamic(&self.field, schema) + .ok_or_else(|| InvalidQuery::FieldDoesNotExist { + full_path: self.field.clone(), + })?; let field_type = field_entry.field_type(); match field_type { diff --git a/quickwit/quickwit-query/src/query_ast/range_query.rs b/quickwit/quickwit-query/src/query_ast/range_query.rs index 52ce26bc306..bca8f00e897 100644 --- a/quickwit/quickwit-query/src/query_ast/range_query.rs +++ b/quickwit/quickwit-query/src/query_ast/range_query.rs @@ -121,7 +121,11 @@ impl BuildTantivyAst for RangeQuery { _with_validation: bool, ) -> Result { let (field, field_entry, json_path) = - super::utils::find_field_or_hit_dynamic(&self.field, schema)?; + super::utils::find_field_or_hit_dynamic(&self.field, schema).ok_or_else(|| { + InvalidQuery::FieldDoesNotExist { + full_path: self.field.clone(), + } + })?; if !field_entry.is_fast() { return Err(InvalidQuery::SchemaError(format!( "range queries are only supported for fast fields. (`{}` is not a fast field)", diff --git a/quickwit/quickwit-query/src/query_ast/utils.rs b/quickwit/quickwit-query/src/query_ast/utils.rs index 310056c0309..7c557b36926 100644 --- a/quickwit/quickwit-query/src/query_ast/utils.rs +++ b/quickwit/quickwit-query/src/query_ast/utils.rs @@ -32,35 +32,45 @@ use crate::tokenizers::TokenizerManager; use crate::InvalidQuery; use crate::MatchAllOrNone::MatchNone as TantivyEmptyQuery; -const DYNAMIC_FIELD_NAME: &str = "_dynamic"; +pub(crate) const DYNAMIC_FIELD_NAME: &str = "_dynamic"; fn make_term_query(term: Term) -> TantivyQueryAst { TantivyTermQuery::new(term, IndexRecordOption::WithFreqs).into() } +/// Find the field or fallback to the dynamic field if it exists pub fn find_field_or_hit_dynamic<'a>( full_path: &'a str, schema: &'a TantivySchema, -) -> Result<(Field, &'a FieldEntry, &'a str), InvalidQuery> { +) -> Option<(Field, &'a FieldEntry, &'a str)> { let (field, path) = if let Some((field, path)) = schema.find_field(full_path) { (field, path) } else { - let dynamic_field = - schema - .get_field(DYNAMIC_FIELD_NAME) - .map_err(|_| InvalidQuery::FieldDoesNotExist { - full_path: full_path.to_string(), - })?; + let dynamic_field = schema.get_field(DYNAMIC_FIELD_NAME).ok()?; (dynamic_field, full_path) }; let field_entry = schema.get_field_entry(field); let typ = field_entry.field_type().value_type(); if !path.is_empty() && typ != Type::Json { - return Err(InvalidQuery::FieldDoesNotExist { - full_path: full_path.to_string(), - }); + return None; } - Ok((field, field_entry, path)) + Some((field, field_entry, path)) +} + +/// Find all the fields that are below the given path. +/// +/// This will return a list of fields only when the path is that of a composite +/// type in the doc mapping. +pub fn find_subfields<'a>( + path: &'a str, + schema: &'a TantivySchema, +) -> Vec<(Field, &'a FieldEntry)> { + let prefix = format!("{}.", path); + schema + .fields() + .into_iter() + .filter(|(_, field_entry)| field_entry.name().starts_with(&prefix)) + .collect() } /// Creates a full text query. @@ -74,12 +84,14 @@ pub(crate) fn full_text_query( tokenizer_manager: &TokenizerManager, lenient: bool, ) -> Result { - let (field, field_entry, path) = match find_field_or_hit_dynamic(full_path, schema) { - Ok(res) => res, - Err(InvalidQuery::FieldDoesNotExist { .. }) if lenient => { - return Ok(TantivyEmptyQuery.into()) + let Some((field, field_entry, path)) = find_field_or_hit_dynamic(full_path, schema) else { + if lenient { + return Ok(TantivyEmptyQuery.into()); + } else { + return Err(InvalidQuery::FieldDoesNotExist { + full_path: full_path.to_string(), + }); } - Err(e) => return Err(e), }; compute_query_with_field( field, diff --git a/quickwit/quickwit-query/src/query_ast/wildcard_query.rs b/quickwit/quickwit-query/src/query_ast/wildcard_query.rs index 145e5a45bd1..81a44932f22 100644 --- a/quickwit/quickwit-query/src/query_ast/wildcard_query.rs +++ b/quickwit/quickwit-query/src/query_ast/wildcard_query.rs @@ -110,7 +110,12 @@ impl WildcardQuery { schema: &TantivySchema, tokenizer_manager: &TokenizerManager, ) -> Result<(Field, Term), InvalidQuery> { - let (field, field_entry, json_path) = find_field_or_hit_dynamic(&self.field, schema)?; + let Some((field, field_entry, json_path)) = find_field_or_hit_dynamic(&self.field, schema) + else { + return Err(InvalidQuery::FieldDoesNotExist { + full_path: self.field.clone(), + }); + }; let field_type = field_entry.field_type(); let prefix = unescape_with_final_wildcard(&self.value)?; diff --git a/quickwit/quickwit-search/src/collector.rs b/quickwit/quickwit-search/src/collector.rs index 67beb8090cb..454946f6fe8 100644 --- a/quickwit/quickwit-search/src/collector.rs +++ b/quickwit/quickwit-search/src/collector.rs @@ -23,7 +23,7 @@ use std::collections::HashSet; use itertools::Itertools; use quickwit_common::binary_heap::{SortKeyMapper, TopK}; -use quickwit_doc_mapper::WarmupInfo; +use quickwit_doc_mapper::{FastFieldWarmupInfo, WarmupInfo}; use quickwit_proto::search::{ LeafSearchResponse, PartialHit, ResourceStats, SearchRequest, SortByValue, SortOrder, SortValue, SplitSearchError, @@ -753,7 +753,14 @@ impl QuickwitCollector { pub fn warmup_info(&self) -> WarmupInfo { WarmupInfo { - fast_field_names: self.fast_field_names(), + fast_fields: self + .fast_field_names() + .into_iter() + .map(|name| FastFieldWarmupInfo { + name, + with_subfields: false, + }) + .collect(), field_norms: self.requires_scoring(), ..WarmupInfo::default() } diff --git a/quickwit/quickwit-search/src/leaf.rs b/quickwit/quickwit-search/src/leaf.rs index 236149ca038..7242e62c409 100644 --- a/quickwit/quickwit-search/src/leaf.rs +++ b/quickwit/quickwit-search/src/leaf.rs @@ -29,7 +29,7 @@ use bytesize::ByteSize; use futures::future::try_join_all; use quickwit_common::pretty::PrettySample; use quickwit_directories::{CachingDirectory, HotDirectory, StorageDirectory}; -use quickwit_doc_mapper::{DocMapper, TermRange, WarmupInfo}; +use quickwit_doc_mapper::{DocMapper, FastFieldWarmupInfo, TermRange, WarmupInfo}; use quickwit_proto::search::{ CountHits, LeafSearchRequest, LeafSearchResponse, PartialHit, ResourceStats, SearchRequest, SortOrder, SortValue, SplitIdAndFooterOffsets, SplitSearchError, @@ -219,7 +219,7 @@ pub(crate) async fn warmup(searcher: &Searcher, warmup_info: &WarmupInfo) -> any let warm_up_term_dict_future = warm_up_term_dict_fields(searcher, &warmup_info.term_dict_fields) .instrument(debug_span!("warm_up_term_dicts")); - let warm_up_fastfields_future = warm_up_fastfields(searcher, &warmup_info.fast_field_names) + let warm_up_fastfields_future = warm_up_fastfields(searcher, &warmup_info.fast_fields) .instrument(debug_span!("warm_up_fastfields")); let warm_up_fieldnorms_future = warm_up_fieldnorms(searcher, warmup_info.field_norms) .instrument(debug_span!("warm_up_fieldnorms")); @@ -271,11 +271,17 @@ async fn warm_up_postings(searcher: &Searcher, fields: &HashSet) -> anyho async fn warm_up_fastfield( fast_field_reader: &FastFieldReaders, - fast_field_name: &str, + fast_field: &FastFieldWarmupInfo, ) -> anyhow::Result<()> { - let columns = fast_field_reader - .list_dynamic_column_handles(fast_field_name) + let mut columns = fast_field_reader + .list_dynamic_column_handles(&fast_field.name) .await?; + if fast_field.with_subfields { + let subpath_columns = fast_field_reader + .list_subpath_dynamic_column_handles(&fast_field.name) + .await?; + columns.extend(subpath_columns); + } futures::future::try_join_all( columns .into_iter() @@ -289,13 +295,13 @@ async fn warm_up_fastfield( /// all of the fast fields passed as argument. async fn warm_up_fastfields( searcher: &Searcher, - fast_field_names: &HashSet, + fast_fields: &HashSet, ) -> anyhow::Result<()> { let mut warm_up_futures = Vec::new(); for segment_reader in searcher.segment_readers() { let fast_field_reader = segment_reader.fast_fields(); - for fast_field_name in fast_field_names { - let warm_up_fut = warm_up_fastfield(fast_field_reader, fast_field_name); + for fast_field in fast_fields { + let warm_up_fut = warm_up_fastfield(fast_field_reader, fast_field); warm_up_futures.push(Box::pin(warm_up_fut)); } } diff --git a/quickwit/quickwit-search/src/root.rs b/quickwit/quickwit-search/src/root.rs index 724687148f2..d9debcc9cbd 100644 --- a/quickwit/quickwit-search/src/root.rs +++ b/quickwit/quickwit-search/src/root.rs @@ -1339,7 +1339,7 @@ pub async fn search_plan( searched_splits: split_ids, storage_requests: StorageRequestCount { footer: 1, - fastfield: warmup_info.fast_field_names.len(), + fastfield: warmup_info.fast_fields.len(), fieldnorm: fieldnorm_query_count, sstable: sstable_query_count, posting: sstable_query_count, diff --git a/quickwit/quickwit-search/src/search_stream/leaf.rs b/quickwit/quickwit-search/src/search_stream/leaf.rs index 0659965b40d..a1728a889a1 100644 --- a/quickwit/quickwit-search/src/search_stream/leaf.rs +++ b/quickwit/quickwit-search/src/search_stream/leaf.rs @@ -24,7 +24,7 @@ use std::sync::Arc; use futures::{FutureExt, StreamExt}; use quickwit_common::pretty::PrettySample; -use quickwit_doc_mapper::DocMapper; +use quickwit_doc_mapper::{DocMapper, FastFieldWarmupInfo, WarmupInfo}; use quickwit_proto::search::{ LeafSearchStreamResponse, OutputFormat, SearchRequest, SearchStreamRequest, SplitIdAndFooterOffsets, @@ -181,12 +181,21 @@ async fn leaf_search_stream_single_split( .iter() .any(|sort| sort.field_name == "_score"); - // TODO no test fail if this line get removed - warmup_info.field_norms |= requires_scoring; - - let fast_field_names = - request_fields.fast_fields_for_request(timestamp_filter_builder_opt.as_ref()); - warmup_info.fast_field_names.extend(fast_field_names); + let fast_fields = request_fields + .fast_fields_for_request(timestamp_filter_builder_opt.as_ref()) + .into_iter() + .map(|name| FastFieldWarmupInfo { + name, + with_subfields: false, + }) + .collect(); + let stream_warmup_info = WarmupInfo { + fast_fields, + // TODO no test fail if this line get removed + field_norms: requires_scoring, + ..Default::default() + }; + warmup_info.merge(stream_warmup_info); warmup_info.simplify(); warmup(&searcher, &warmup_info).await?; diff --git a/quickwit/rest-api-tests/scenarii/qw_search_api/0004_exists_search.yaml b/quickwit/rest-api-tests/scenarii/qw_search_api/0004_exists_search.yaml index 35739d79a34..808d0fbf95a 100644 --- a/quickwit/rest-api-tests/scenarii/qw_search_api/0004_exists_search.yaml +++ b/quickwit/rest-api-tests/scenarii/qw_search_api/0004_exists_search.yaml @@ -1,60 +1,65 @@ - -endpoint: json/search +endpoint: nested/search params: - query: "json_fast:*" -expected: - num_hits: 1 ---- -endpoint: json/search -params: - query: "json_text:*" + query: "doesnotexist:*" expected: - num_hits: 2 ---- -endpoint: json/search -params: - query: "json_text.field_a:*" -expected: - num_hits: 2 + num_hits: 0 --- -endpoint: json/search +endpoint: nested/search params: - query: "json_text.field_b:*" + query: "json_fast:*" expected: num_hits: 1 --- -endpoint: json/search +endpoint: nested/search params: - query: "json_fast.fast_field_a:*" + query: "json_fast.field_c:*" expected: num_hits: 1 --- -endpoint: json/search +endpoint: nested/search params: query: "json_fast.doesnotexist:*" expected: num_hits: 0 --- -endpoint: json/search +endpoint: nested/search params: query: "object_multi:*" expected: num_hits: 3 --- -endpoint: json/search +endpoint: nested/search params: query: "object_multi.object_text_field:*" expected: num_hits: 1 --- -endpoint: json/search +endpoint: nested/search params: query: "object_multi.object_fast_field:*" expected: num_hits: 2 --- -endpoint: json/search +endpoint: nested/search params: query: "object_multi.doesnotexist:*" expected: num_hits: 0 +--- +endpoint: nested/search +params: + query: "json_text:*" +expected: + num_hits: 2 +--- +endpoint: nested/search +params: + query: "json_text.field_a:*" +expected: + num_hits: 2 +--- +endpoint: nested/search +params: + query: "json_text.field_b:*" +expected: + num_hits: 1 diff --git a/quickwit/rest-api-tests/scenarii/qw_search_api/_setup.quickwit.yaml b/quickwit/rest-api-tests/scenarii/qw_search_api/_setup.quickwit.yaml index 9c5542af5fe..d9b34e5be97 100644 --- a/quickwit/rest-api-tests/scenarii/qw_search_api/_setup.quickwit.yaml +++ b/quickwit/rest-api-tests/scenarii/qw_search_api/_setup.quickwit.yaml @@ -84,17 +84,24 @@ params: ndjson: - {"seq": 4, "tag": 1} --- +method: DELETE +endpoint: indexes/nested +status_code: null +--- method: POST endpoint: indexes/ json: version: "0.7" - index_id: json + index_id: nested doc_mapping: + # default mode is dynamic field_mappings: - name: json_text type: json + indexed: true - name: json_fast type: json + stored: true fast: true - name: object_multi type: object @@ -107,7 +114,7 @@ json: --- method: POST -endpoint: tagged/ingest +endpoint: nested/ingest params: commit: force ndjson: