Skip to content

Commit

Permalink
Add missing fields to ES compatible Query API (#5098)
Browse files Browse the repository at this point in the history
* Add missing fields to ES compatible Query API

* [_source][1] schema
* [docvalue][2] schema

I've added additional fields as `serde_json::Value` to prevent complex
struct definitions as we won't use them anyway.

[1]: https://www.elastic.co/guide/en/elasticsearch/reference/7.17/search-fields.html#source-filtering
[2]: https://www.elastic.co/guide/en/elasticsearch/reference/current/search-fields.html#docvalue-fields

* Add missing multi match modes to ES compatible Query API

According to [doc][1] multi_match has 6 modes. Best fields and Cross
fields can be converted to Most fields as they close by logic and I
couldn't find their implementations.

BoolPrefix was implemented and I added it to the parser.

Tests was extended to cover all cases, for that I changed a bit of
structures and added missing traits implementations.

[1]: https://opensearch.org/docs/latest/query-dsl/full-text/multi-match/#multi-match-query-types
  • Loading branch information
kuzaxak authored Jun 12, 2024
1 parent 83bb28e commit 379a752
Show file tree
Hide file tree
Showing 5 changed files with 218 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

use serde::Deserialize;

use super::StringOrStructForSerialization;
use super::{ElasticQueryDslInner, StringOrStructForSerialization};
use crate::elastic_query_dsl::match_query::MatchQueryParams;
use crate::elastic_query_dsl::{default_max_expansions, ConvertableToQueryAst};
use crate::query_ast::{FullTextParams, FullTextQuery, QueryAst};
Expand Down Expand Up @@ -52,6 +52,12 @@ impl ConvertableToQueryAst for MatchBoolPrefixQuery {
}
}

impl From<MatchBoolPrefixQuery> for ElasticQueryDslInner {
fn from(match_bool_prefix_query: MatchBoolPrefixQuery) -> Self {
ElasticQueryDslInner::MatchBoolPrefix(match_bool_prefix_query)
}
}

impl From<OneFieldMap<StringOrStructForSerialization<MatchQueryParams>>> for MatchBoolPrefixQuery {
fn from(
match_query_params: OneFieldMap<StringOrStructForSerialization<MatchQueryParams>>,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@

use serde::Deserialize;

use crate::elastic_query_dsl::{ConvertableToQueryAst, StringOrStructForSerialization};
use crate::elastic_query_dsl::{
ConvertableToQueryAst, ElasticQueryDslInner, StringOrStructForSerialization,
};
use crate::query_ast::{FullTextMode, FullTextParams, FullTextQuery, QueryAst};
use crate::{MatchAllOrNone, OneFieldMap};

Expand All @@ -35,13 +37,13 @@ pub(crate) struct MatchPhraseQuery {
#[derive(Clone, Deserialize, PartialEq, Eq, Debug)]
#[serde(deny_unknown_fields)]
pub struct MatchPhraseQueryParams {
query: String,
pub(crate) query: String,
#[serde(default)]
zero_terms_query: MatchAllOrNone,
pub(crate) zero_terms_query: MatchAllOrNone,
#[serde(default)]
analyzer: Option<String>,
pub(crate) analyzer: Option<String>,
#[serde(default)]
slop: u32,
pub(crate) slop: u32,
}

impl ConvertableToQueryAst for MatchPhraseQuery {
Expand All @@ -61,6 +63,12 @@ impl ConvertableToQueryAst for MatchPhraseQuery {
}
}

impl From<MatchPhraseQuery> for ElasticQueryDslInner {
fn from(match_phrase_query: MatchPhraseQuery) -> Self {
ElasticQueryDslInner::MatchPhrase(match_phrase_query)
}
}

impl From<OneFieldMap<StringOrStructForSerialization<MatchPhraseQueryParams>>>
for MatchPhraseQuery
{
Expand Down
169 changes: 167 additions & 2 deletions quickwit/quickwit-query/src/elastic_query_dsl/multi_match.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ use serde_with::formats::PreferMany;
use serde_with::{serde_as, OneOrMany};

use crate::elastic_query_dsl::bool_query::BoolQuery;
use crate::elastic_query_dsl::match_bool_prefix::MatchBoolPrefixQuery;
use crate::elastic_query_dsl::match_phrase_query::{MatchPhraseQuery, MatchPhraseQueryParams};
use crate::elastic_query_dsl::match_query::{MatchQuery, MatchQueryParams};
use crate::elastic_query_dsl::phrase_prefix_query::{
Expand Down Expand Up @@ -78,7 +79,15 @@ fn deserialize_match_query_for_one_field(
};
Ok(ElasticQueryDslInner::MatchPhrasePrefix(phrase_prefix))
}
MatchType::MostFields => {
MatchType::BoolPrefix => {
let bool_prefix_params: MatchQueryParams = serde_json::from_value(json_val)?;
let bool_prefix = MatchBoolPrefixQuery {
params: bool_prefix_params,
field: field.to_string(),
};
Ok(ElasticQueryDslInner::MatchBoolPrefix(bool_prefix))
}
MatchType::MostFields | MatchType::BestFields | MatchType::CrossFields => {
let match_query_params: MatchQueryParams = serde_json::from_value(json_val)?;
let match_query = MatchQuery {
field: field.to_string(),
Expand Down Expand Up @@ -110,6 +119,7 @@ impl TryFrom<MultiMatchQueryForDeserialization> for MultiMatchQuery {

fn try_from(multi_match_query: MultiMatchQueryForDeserialization) -> Result<Self, Self::Error> {
if multi_match_query.fields.is_empty() {
// TODO: We can use default field from index configuration instead
return Err(serde::de::Error::custom(
"Quickwit does not support multi match query with 0 fields. MultiMatchQueries \
must have at least one field.",
Expand Down Expand Up @@ -139,8 +149,11 @@ impl TryFrom<MultiMatchQueryForDeserialization> for MultiMatchQuery {
pub enum MatchType {
#[default]
MostFields,
BestFields, // Not implemented will be converted to MostFields
CrossFields, // Not implemented will be converted to MostFields
Phrase,
PhrasePrefix,
BoolPrefix,
}

impl ConvertableToQueryAst for MultiMatchQuery {
Expand All @@ -151,8 +164,8 @@ impl ConvertableToQueryAst for MultiMatchQuery {

#[cfg(test)]
mod tests {

use super::*;
use crate::elastic_query_dsl::default_max_expansions;

#[track_caller]
fn test_multimatch_query_ok_aux<T: Into<ElasticQueryDslInner>>(json: &str, expected: T) {
Expand Down Expand Up @@ -201,6 +214,158 @@ mod tests {
.into(),
]),
);

test_multimatch_query_ok_aux(
r#"{
"query": "quick brown fox",
"type": "best_fields",
"fields": ["title", "body"]
}"#,
BoolQuery::union(vec![
MatchQuery {
field: "title".to_string(),
params: MatchQueryParams {
query: "quick brown fox".to_string(),
operator: crate::BooleanOperand::Or,
zero_terms_query: Default::default(),
_lenient: false,
},
}
.into(),
MatchQuery {
field: "body".to_string(),
params: MatchQueryParams {
query: "quick brown fox".to_string(),
operator: crate::BooleanOperand::Or,
zero_terms_query: Default::default(),
_lenient: false,
},
}
.into(),
]),
);

test_multimatch_query_ok_aux(
r#"{
"query": "quick brown fox",
"type": "cross_fields",
"fields": ["title", "body"]
}"#,
BoolQuery::union(vec![
MatchQuery {
field: "title".to_string(),
params: MatchQueryParams {
query: "quick brown fox".to_string(),
operator: crate::BooleanOperand::Or,
zero_terms_query: Default::default(),
_lenient: false,
},
}
.into(),
MatchQuery {
field: "body".to_string(),
params: MatchQueryParams {
query: "quick brown fox".to_string(),
operator: crate::BooleanOperand::Or,
zero_terms_query: Default::default(),
_lenient: false,
},
}
.into(),
]),
);

test_multimatch_query_ok_aux(
r#"{
"query": "quick brown fox",
"type": "phrase",
"fields": ["title", "body"]
}"#,
BoolQuery::union(vec![
MatchPhraseQuery {
field: "title".to_string(),
params: MatchPhraseQueryParams {
query: "quick brown fox".to_string(),
zero_terms_query: Default::default(),
analyzer: None,
slop: Default::default(),
},
}
.into(),
MatchPhraseQuery {
field: "body".to_string(),
params: MatchPhraseQueryParams {
query: "quick brown fox".to_string(),
zero_terms_query: Default::default(),
analyzer: None,
slop: Default::default(),
},
}
.into(),
]),
);

test_multimatch_query_ok_aux(
r#"{
"query": "quick brown fox",
"type": "phrase_prefix",
"fields": ["title", "body"]
}"#,
BoolQuery::union(vec![
MatchPhrasePrefixQuery {
field: "title".to_string(),
value: MatchPhrasePrefixQueryParams {
query: "quick brown fox".to_string(),
analyzer: Default::default(),
max_expansions: default_max_expansions(),
slop: Default::default(),
zero_terms_query: Default::default(),
},
}
.into(),
MatchPhrasePrefixQuery {
field: "body".to_string(),
value: MatchPhrasePrefixQueryParams {
query: "quick brown fox".to_string(),
analyzer: Default::default(),
max_expansions: default_max_expansions(),
slop: Default::default(),
zero_terms_query: Default::default(),
},
}
.into(),
]),
);

test_multimatch_query_ok_aux(
r#"{
"query": "quick brown",
"type": "bool_prefix",
"fields": ["title", "body"]
}"#,
BoolQuery::union(vec![
MatchBoolPrefixQuery {
field: "title".to_string(),
params: MatchQueryParams {
query: "quick brown".to_string(),
operator: crate::BooleanOperand::Or,
zero_terms_query: Default::default(),
_lenient: false,
},
}
.into(),
MatchBoolPrefixQuery {
field: "body".to_string(),
params: MatchQueryParams {
query: "quick brown".to_string(),
operator: crate::BooleanOperand::Or,
zero_terms_query: Default::default(),
_lenient: false,
},
}
.into(),
]),
);
}

#[test]
Expand Down
3 changes: 3 additions & 0 deletions quickwit/quickwit-query/src/elastic_query_dsl/range_query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ pub struct RangeQueryParams {
lte: Option<JsonLiteral>,
#[serde(default)]
boost: Option<NotNaNf32>,
#[serde(default)]
format: Option<JsonLiteral>,
}

pub type RangeQuery = OneFieldMap<RangeQueryParams>;
Expand All @@ -53,6 +55,7 @@ impl ConvertableToQueryAst for RangeQuery {
lt,
lte,
boost,
format: _,
} = self.value;
let range_query_ast = crate::query_ast::RangeQuery {
field,
Expand Down
30 changes: 28 additions & 2 deletions quickwit/quickwit-serve/src/elasticsearch_api/model/search_body.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,28 @@ struct FieldSortParams {
pub date_format: Option<ElasticDateFormat>,
}

#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
pub struct SourceObject {
includes: Option<Vec<String>>,
excludes: Option<Vec<String>>,
}

#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
#[serde(untagged)]
pub enum Source {
Bool(bool),
String(String),
List(Vec<String>),
Object(SourceObject),
}

#[derive(Debug, Default, Clone, Deserialize, PartialEq)]
#[serde(deny_unknown_fields)]
pub struct SearchBody {
#[serde(default)]
pub _source: Option<Source>,
#[serde(default)]
pub docvalue_fields: Option<serde_json::Value>,
#[serde(default)]
pub from: Option<u64>,
#[serde(default)]
Expand All @@ -86,6 +105,12 @@ pub struct SearchBody {
pub stored_fields: Option<BTreeSet<String>>,
#[serde(default)]
pub search_after: Vec<serde_json::Value>,
#[serde(default)]
pub script_fields: Option<serde_json::Value>,
#[serde(default)]
pub highlight: Option<serde_json::Value>,
#[serde(default)]
pub version: Option<bool>,
}

struct FieldSortVecVisitor;
Expand Down Expand Up @@ -265,8 +290,9 @@ mod tests {
let error_msg = search_body.unwrap_err().to_string();
assert!(error_msg.contains("unknown field `term`"));
assert!(error_msg.contains(
"expected one of `from`, `size`, `query`, `sort`, `aggs`, `track_total_hits`, \
`stored_fields`, `search_after`"
"expected one of `_source`, `docvalue_fields`, `from`, `size`, `query`, `sort`, \
`aggs`, `track_total_hits`, `stored_fields`, `search_after`, `script_fields`, \
`highlight`, `version`"
));
}
}

0 comments on commit 379a752

Please sign in to comment.