Skip to content

Commit

Permalink
support escaped dot, add agg test (#2250)
Browse files Browse the repository at this point in the history
add agg test for nested JSON
allow escaping of dot
  • Loading branch information
PSeitz authored Nov 20, 2023
1 parent 47009ed commit 054f49d
Show file tree
Hide file tree
Showing 4 changed files with 75 additions and 1 deletion.
59 changes: 59 additions & 0 deletions src/aggregation/agg_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -624,6 +624,65 @@ fn test_aggregation_on_json_object() {
);
}

#[test]
fn test_aggregation_on_nested_json_object() {
let mut schema_builder = Schema::builder();
let json = schema_builder.add_json_field("json.blub", FAST);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer: IndexWriter = index.writer_for_tests().unwrap();
index_writer
.add_document(doc!(json => json!({"color.dot": "red", "color": {"nested":"red"} })))
.unwrap();
index_writer
.add_document(doc!(json => json!({"color.dot": "blue", "color": {"nested":"blue"} })))
.unwrap();
index_writer.commit().unwrap();
let reader = index.reader().unwrap();
let searcher = reader.searcher();

let agg: Aggregations = serde_json::from_value(json!({
"jsonagg1": {
"terms": {
"field": "json\\.blub.color\\.dot",
}
},
"jsonagg2": {
"terms": {
"field": "json\\.blub.color.nested",
}
}

}))
.unwrap();

let aggregation_collector = get_collector(agg);
let aggregation_results = searcher.search(&AllQuery, &aggregation_collector).unwrap();
let aggregation_res_json = serde_json::to_value(aggregation_results).unwrap();
assert_eq!(
&aggregation_res_json,
&serde_json::json!({
"jsonagg1": {
"buckets": [
{"doc_count": 1, "key": "blue"},
{"doc_count": 1, "key": "red"}
],
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0
},
"jsonagg2": {
"buckets": [
{"doc_count": 1, "key": "blue"},
{"doc_count": 1, "key": "red"}
],
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0
}

})
);
}

#[test]
fn test_aggregation_on_json_object_empty_columns() {
let mut schema_builder = Schema::builder();
Expand Down
2 changes: 1 addition & 1 deletion src/core/json_utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -320,7 +320,7 @@ pub struct JsonTermWriter<'a> {
/// In other words,
/// - `k8s.node` ends up as `["k8s", "node"]`.
/// - `k8s\.node` ends up as `["k8s.node"]`.
fn split_json_path(json_path: &str) -> Vec<String> {
pub fn split_json_path(json_path: &str) -> Vec<String> {
let mut escaped_state: bool = false;
let mut json_path_segments = Vec::new();
let mut buffer = String::new();
Expand Down
7 changes: 7 additions & 0 deletions src/fastfield/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1288,11 +1288,18 @@ mod tests {
index_writer.commit().unwrap();
let searcher = index.reader().unwrap().searcher();
let fast_field_reader = searcher.segment_reader(0u32).fast_fields();
// Supported for now, maybe dropped in the future.
let column = fast_field_reader
.column_opt::<i64>("jsonfield.attr.age")
.unwrap()
.unwrap();
let vals: Vec<i64> = column.values_for_doc(0u32).collect();
assert_eq!(&vals, &[33]);
let column = fast_field_reader
.column_opt::<i64>("jsonfield\\.attr.age")
.unwrap()
.unwrap();
let vals: Vec<i64> = column.values_for_doc(0u32).collect();
assert_eq!(&vals, &[33]);
}
}
8 changes: 8 additions & 0 deletions src/schema/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use serde::{Deserialize, Deserializer, Serialize, Serializer};

use super::ip_options::IpAddrOptions;
use super::*;
use crate::json_utils::split_json_path;
use crate::schema::bytes_options::BytesOptions;
use crate::TantivyError;

Expand Down Expand Up @@ -328,12 +329,19 @@ impl Schema {
if let Some(field) = self.0.fields_map.get(full_path) {
return Some((*field, ""));
}

let mut splitting_period_pos: Vec<usize> = locate_splitting_dots(full_path);
while let Some(pos) = splitting_period_pos.pop() {
let (prefix, suffix) = full_path.split_at(pos);

if let Some(field) = self.0.fields_map.get(prefix) {
return Some((*field, &suffix[1..]));
}
// JSON path may contain a dot, for now we try both variants to find the field.
let prefix = split_json_path(prefix).join(".");
if let Some(field) = self.0.fields_map.get(&prefix) {
return Some((*field, &suffix[1..]));
}
}
None
}
Expand Down

0 comments on commit 054f49d

Please sign in to comment.