From 36577e26f0d25b17ca98d475cb96fa30072c7c44 Mon Sep 17 00:00:00 2001 From: trinity-1686a Date: Fri, 22 Mar 2024 10:46:13 +0100 Subject: [PATCH] fix negative tag (#4764) * disable tag prunning with negative tag * add regression test for negative tags --- .../quickwit-doc-mapper/src/tag_pruning.rs | 41 ++++++++----------- .../qw_search_api/0002_negative_tags.yaml | 24 +++++++++++ .../qw_search_api/_setup.quickwit.yaml | 39 ++++++++++++++++++ .../qw_search_api/_teardown.quickwit.yaml | 3 ++ 4 files changed, 84 insertions(+), 23 deletions(-) create mode 100644 quickwit/rest-api-tests/scenarii/qw_search_api/0002_negative_tags.yaml diff --git a/quickwit/quickwit-doc-mapper/src/tag_pruning.rs b/quickwit/quickwit-doc-mapper/src/tag_pruning.rs index 5636ddc7cc6..08ece78f029 100644 --- a/quickwit/quickwit-doc-mapper/src/tag_pruning.rs +++ b/quickwit/quickwit-doc-mapper/src/tag_pruning.rs @@ -145,11 +145,7 @@ enum UnsimplifiedTagFilterAst { enum TermFilterAst { And(Vec), Or(Vec), - Term { - is_present: bool, - field: String, - value: String, - }, + Term { field: String, value: String }, } /// Records terms into a set of tags. @@ -266,12 +262,18 @@ fn simplify_ast(ast: UnsimplifiedTagFilterAst) -> Option { is_present, field, value, - } => TermFilterAst::Term { - is_present, - field, - value, + } => { + if is_present { + Some(TermFilterAst::Term { field, value }) + } else { + // we can't do tag pruning on negative filters. If `field` can be one of 1 or 2, + // and we search for not(1), we don't want to remove a split where + // tags=[1,2] (which is_present: false does). It's even more problematic if some + // documents have `field` unset, because we don't record that at all, so can't + // even reject a split based on it having tags=[1]. + None + } } - .into(), UnsimplifiedTagFilterAst::Uninformative => None, } } @@ -294,17 +296,13 @@ fn expand_to_tag_ast(terms_filter_ast: TermFilterAst) -> TagFilterAst { TermFilterAst::Or(children) => { TagFilterAst::Or(children.into_iter().map(expand_to_tag_ast).collect()) } - TermFilterAst::Term { - is_present, - field, - value, - } => { + TermFilterAst::Term { field, value } => { let field_is_tag = TagFilterAst::Tag { is_present: false, tag: field_tag(&field), }; let term_tag = TagFilterAst::Tag { - is_present, + is_present: true, tag: term_tag(&field, &value), }; TagFilterAst::Or(vec![field_is_tag, term_tag]) @@ -470,21 +468,18 @@ mod test { #[test] fn test_disjunction_of_tag_disjunction_with_not_clause() { - assert_eq!( - extract_tags_from_query_helper("(user:bart -lang:fr)") - .unwrap() - .to_string(), - "((¬user! ∨ user:bart) ∨ (¬lang! ∨ ¬lang:fr))" - ); + // ORed negative tags make the result inconclusive. See simplify_ast() for details + assert!(extract_tags_from_query_helper("(user:bart -lang:fr)").is_none()); } #[test] fn test_disjunction_of_tag_conjunction_with_not_clause() { + // negative tags are removed from AND clauses. See simplify_ast() for details assert_eq!( &extract_tags_from_query_helper("user:bart AND NOT lang:fr") .unwrap() .to_string(), - "(¬user! ∨ user:bart) ∧ (¬lang! ∨ ¬lang:fr)" + "(¬user! ∨ user:bart)" ); } diff --git a/quickwit/rest-api-tests/scenarii/qw_search_api/0002_negative_tags.yaml b/quickwit/rest-api-tests/scenarii/qw_search_api/0002_negative_tags.yaml new file mode 100644 index 00000000000..0b716eb9a1b --- /dev/null +++ b/quickwit/rest-api-tests/scenarii/qw_search_api/0002_negative_tags.yaml @@ -0,0 +1,24 @@ +# regression test for https://github.com/quickwit-oss/quickwit/issues/4698 +endpoint: tagged/search +params: + query: "tag:1" +expected: + num_hits: 3 +--- +endpoint: tagged/search +params: + query: "-tag:2" +expected: + num_hits: 4 +--- +endpoint: tagged/search +params: + query: "tag:2" +expected: + num_hits: 1 +--- +endpoint: tagged/search +params: + query: "-tag:1" +expected: + num_hits: 2 diff --git a/quickwit/rest-api-tests/scenarii/qw_search_api/_setup.quickwit.yaml b/quickwit/rest-api-tests/scenarii/qw_search_api/_setup.quickwit.yaml index 1079499d7bb..84270d975b4 100644 --- a/quickwit/rest-api-tests/scenarii/qw_search_api/_setup.quickwit.yaml +++ b/quickwit/rest-api-tests/scenarii/qw_search_api/_setup.quickwit.yaml @@ -43,3 +43,42 @@ ndjson: - {"ts": 1684993004, "not_fast": 1684993004} # a missing timestamp - {"not_fast": 1684993003} +--- +method: DELETE +endpoint: indexes/tagged +status_code: null +--- +method: POST +endpoint: indexes/ +json: + version: "0.7" + index_id: tagged + doc_mapping: + field_mappings: + - name: seq + type: u64 + - name: tag + type: u64 + tag_fields: ["tag"] +--- +method: POST +endpoint: tagged/ingest +params: + commit: force +ndjson: + - {"seq": 1, "tag": 1} + - {"seq": 2, "tag": 2} +---method: POST +endpoint: tagged/ingest +params: + commit: force +ndjson: + - {"seq": 1, "tag": 1} + - {"seq": 3, "tag": null} +--- +method: POST +endpoint: tagged/ingest +params: + commit: force +ndjson: + - {"seq": 4, "tag": 1} diff --git a/quickwit/rest-api-tests/scenarii/qw_search_api/_teardown.quickwit.yaml b/quickwit/rest-api-tests/scenarii/qw_search_api/_teardown.quickwit.yaml index 5d91806a899..a2896f541b1 100644 --- a/quickwit/rest-api-tests/scenarii/qw_search_api/_teardown.quickwit.yaml +++ b/quickwit/rest-api-tests/scenarii/qw_search_api/_teardown.quickwit.yaml @@ -1,3 +1,6 @@ # Delete index method: DELETE endpoint: indexes/simple +--- +method: DELETE +endpoint: indexes/tagged