Skip to content

Commit

Permalink
remove path truncation logic, add assertion
Browse files Browse the repository at this point in the history
  • Loading branch information
PSeitz committed Nov 8, 2023
1 parent 0fcdbe4 commit 9a1b756
Show file tree
Hide file tree
Showing 4 changed files with 41 additions and 44 deletions.
36 changes: 13 additions & 23 deletions src/core/json_utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -247,10 +247,17 @@ fn index_json_value<'a, V: Value<'a>>(

/// Tries to infer a JSON type from a string and append it to the term.
///
/// Disclaimer
/// The term is expected to not contain the type byte nor the value.
pub(crate) fn convert_to_fast_value_and_append(term: &Term, phrase: &str) -> Option<Term> {
let mut term = term.clone();
/// The term must be json + JSON path.
pub(crate) fn convert_to_fast_value_and_append(mut term: Term, phrase: &str) -> Option<Term> {
assert_eq!(
term.value()
.as_json_value_bytes()
.expect("expecting a Term with a json type and json path")
.as_serialized()
.len(),
0,
"JSON value bytes should be empty"
);
if let Ok(dt) = OffsetDateTime::parse(phrase, &Rfc3339) {
let dt_utc = dt.to_offset(UtcOffset::UTC);
term.append_type_and_fast_value(DateTime::from_utc(dt_utc));
Expand All @@ -275,23 +282,6 @@ pub(crate) fn convert_to_fast_value_and_append(term: &Term, phrase: &str) -> Opt
None
}

/// helper function to generate a list of terms with their positions from a textual json value
pub(crate) fn append_string_and_get_terms(
term: &mut Term,
value: &str,
text_analyzer: &mut TextAnalyzer,
) -> Vec<(usize, Term)> {
let mut positions_and_terms = Vec::<(usize, Term)>::new();
let term_num_bytes = term.len_bytes();
let mut token_stream = text_analyzer.token_stream(value);
token_stream.process(&mut |token| {
term.truncate_value_bytes(term_num_bytes);
term.append_str(&token.text);
positions_and_terms.push((token.position, term.clone()));
});
positions_and_terms
}

/// Splits a json path supplied to the query parser in such a way that
/// `.` can be escaped.
///
Expand Down Expand Up @@ -374,7 +364,7 @@ mod tests {
let field = Field::from_field_id(1);

let mut term = term_from_json_paths(field, ["attributes", "color"].into_iter(), false);
term.append_str("red");
term.append_type_and_str("red");
assert_eq!(
format!("{:?}", term),
"Term(field=1, type=Json, path=attributes.color, type=Str, \"red\")"
Expand All @@ -396,7 +386,7 @@ mod tests {
fn test_string_term() {
let field = Field::from_field_id(1);
let mut term = term_from_json_paths(field, ["color"].into_iter(), false);
term.append_str("red");
term.append_type_and_str("red");

assert_eq!(term.serialized_term(), b"\x00\x00\x00\x01jcolor\x00sred")
}
Expand Down
12 changes: 6 additions & 6 deletions src/indexer/segment_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -655,7 +655,7 @@ mod tests {
term
}
fn set_str(val: &str, mut term: Term) -> Term {
term.append_str(val);
term.append_type_and_str(val);
term
}

Expand Down Expand Up @@ -776,7 +776,7 @@ mod tests {
let segment_reader = searcher.segment_reader(0u32);
let inv_index = segment_reader.inverted_index(json_field).unwrap();
let mut term = term_from_json_paths(json_field, ["mykey"].into_iter(), false);
term.append_str("token");
term.append_type_and_str("token");
let term_info = inv_index.get_term_info(&term).unwrap().unwrap();
assert_eq!(
term_info,
Expand Down Expand Up @@ -815,7 +815,7 @@ mod tests {
let segment_reader = searcher.segment_reader(0u32);
let inv_index = segment_reader.inverted_index(json_field).unwrap();
let mut term = term_from_json_paths(json_field, ["mykey"].into_iter(), false);
term.append_str("two tokens");
term.append_type_and_str("two tokens");
let term_info = inv_index.get_term_info(&term).unwrap().unwrap();
assert_eq!(
term_info,
Expand Down Expand Up @@ -858,13 +858,13 @@ mod tests {
let term = term_from_json_paths(json_field, ["mykey", "field"].into_iter(), false);

let mut hello_term = term.clone();
hello_term.append_str("hello");
hello_term.append_type_and_str("hello");

let mut nothello_term = term.clone();
nothello_term.append_str("nothello");
nothello_term.append_type_and_str("nothello");

let mut happy_term = term.clone();
happy_term.append_str("happy");
happy_term.append_type_and_str("happy");

let phrase_query = PhraseQuery::new(vec![hello_term, happy_term.clone()]);
assert_eq!(searcher.search(&phrase_query, &Count).unwrap(), 1);
Expand Down
35 changes: 21 additions & 14 deletions src/query/query_parser/query_parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,7 @@ use rustc_hash::FxHashMap;

use super::logical_ast::*;
use crate::core::Index;
use crate::json_utils::{
append_string_and_get_terms, convert_to_fast_value_and_append, split_json_path,
term_from_json_paths,
};
use crate::json_utils::{convert_to_fast_value_and_append, split_json_path, term_from_json_paths};
use crate::query::range_query::{is_type_valid_for_fastfield_range_query, RangeQuery};
use crate::query::{
AllQuery, BooleanQuery, BoostQuery, EmptyQuery, FuzzyTermQuery, Occur, PhrasePrefixQuery,
Expand Down Expand Up @@ -968,19 +965,29 @@ fn generate_literals_for_json_object(
let mut logical_literals = Vec::new();

let paths = split_json_path(json_path);
let mut term = term_from_json_paths(
field,
paths.iter().map(|el| el.as_str()),
json_options.is_expand_dots_enabled(),
);
let get_term_with_path = || {
term_from_json_paths(
field,
paths.iter().map(|el| el.as_str()),
json_options.is_expand_dots_enabled(),
)
};

if let Some(term) = convert_to_fast_value_and_append(&term, phrase) {
if let Some(term) = convert_to_fast_value_and_append(get_term_with_path(), phrase) {
logical_literals.push(LogicalLiteral::Term(term));
}
let terms = append_string_and_get_terms(&mut term, phrase, &mut text_analyzer);

if terms.len() <= 1 {
for (_, term) in terms {
// Try to tokenize the phrase and create Terms.
let mut positions_and_terms = Vec::<(usize, Term)>::new();
let mut token_stream = text_analyzer.token_stream(phrase);
token_stream.process(&mut |token| {
let mut term = get_term_with_path();
term.append_type_and_str(&token.text);
positions_and_terms.push((token.position, term.clone()));
});

if positions_and_terms.len() <= 1 {
for (_, term) in positions_and_terms {
logical_literals.push(LogicalLiteral::Term(term));
}
return Ok(logical_literals);
Expand All @@ -991,7 +998,7 @@ fn generate_literals_for_json_object(
));
}
logical_literals.push(LogicalLiteral::Phrase {
terms,
terms: positions_and_terms,
slop: 0,
prefix: false,
});
Expand Down
2 changes: 1 addition & 1 deletion src/schema/term.rs
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ impl Term {
/// This is used in JSON type to append a str after the path.
///
/// It will not clear existing bytes.
pub(crate) fn append_str(&mut self, val: &str) {
pub(crate) fn append_type_and_str(&mut self, val: &str) {
self.0.push(Type::Str.to_code());
self.0.extend(val.as_bytes().as_ref());
}
Expand Down

0 comments on commit 9a1b756

Please sign in to comment.