Skip to content

Commit

Permalink
fix negative tag (#4764)
Browse files Browse the repository at this point in the history
* disable tag prunning with negative tag

* add regression test for negative tags
  • Loading branch information
trinity-1686a authored Mar 22, 2024
1 parent f509e0e commit 36577e2
Show file tree
Hide file tree
Showing 4 changed files with 84 additions and 23 deletions.
41 changes: 18 additions & 23 deletions quickwit/quickwit-doc-mapper/src/tag_pruning.rs
Original file line number Diff line number Diff line change
Expand Up @@ -145,11 +145,7 @@ enum UnsimplifiedTagFilterAst {
enum TermFilterAst {
And(Vec<TermFilterAst>),
Or(Vec<TermFilterAst>),
Term {
is_present: bool,
field: String,
value: String,
},
Term { field: String, value: String },
}

/// Records terms into a set of tags.
Expand Down Expand Up @@ -266,12 +262,18 @@ fn simplify_ast(ast: UnsimplifiedTagFilterAst) -> Option<TermFilterAst> {
is_present,
field,
value,
} => TermFilterAst::Term {
is_present,
field,
value,
} => {
if is_present {
Some(TermFilterAst::Term { field, value })
} else {
// we can't do tag pruning on negative filters. If `field` can be one of 1 or 2,
// and we search for not(1), we don't want to remove a split where
// tags=[1,2] (which is_present: false does). It's even more problematic if some
// documents have `field` unset, because we don't record that at all, so can't
// even reject a split based on it having tags=[1].
None
}
}
.into(),
UnsimplifiedTagFilterAst::Uninformative => None,
}
}
Expand All @@ -294,17 +296,13 @@ fn expand_to_tag_ast(terms_filter_ast: TermFilterAst) -> TagFilterAst {
TermFilterAst::Or(children) => {
TagFilterAst::Or(children.into_iter().map(expand_to_tag_ast).collect())
}
TermFilterAst::Term {
is_present,
field,
value,
} => {
TermFilterAst::Term { field, value } => {
let field_is_tag = TagFilterAst::Tag {
is_present: false,
tag: field_tag(&field),
};
let term_tag = TagFilterAst::Tag {
is_present,
is_present: true,
tag: term_tag(&field, &value),
};
TagFilterAst::Or(vec![field_is_tag, term_tag])
Expand Down Expand Up @@ -470,21 +468,18 @@ mod test {

#[test]
fn test_disjunction_of_tag_disjunction_with_not_clause() {
assert_eq!(
extract_tags_from_query_helper("(user:bart -lang:fr)")
.unwrap()
.to_string(),
"((¬user! ∨ user:bart) ∨ (¬lang! ∨ ¬lang:fr))"
);
// ORed negative tags make the result inconclusive. See simplify_ast() for details
assert!(extract_tags_from_query_helper("(user:bart -lang:fr)").is_none());
}

#[test]
fn test_disjunction_of_tag_conjunction_with_not_clause() {
// negative tags are removed from AND clauses. See simplify_ast() for details
assert_eq!(
&extract_tags_from_query_helper("user:bart AND NOT lang:fr")
.unwrap()
.to_string(),
"(¬user! ∨ user:bart) ∧ (¬lang! ∨ ¬lang:fr)"
"(¬user! ∨ user:bart)"
);
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# regression test for https://github.com/quickwit-oss/quickwit/issues/4698
endpoint: tagged/search
params:
query: "tag:1"
expected:
num_hits: 3
---
endpoint: tagged/search
params:
query: "-tag:2"
expected:
num_hits: 4
---
endpoint: tagged/search
params:
query: "tag:2"
expected:
num_hits: 1
---
endpoint: tagged/search
params:
query: "-tag:1"
expected:
num_hits: 2
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,42 @@ ndjson:
- {"ts": 1684993004, "not_fast": 1684993004}
# a missing timestamp
- {"not_fast": 1684993003}
---
method: DELETE
endpoint: indexes/tagged
status_code: null
---
method: POST
endpoint: indexes/
json:
version: "0.7"
index_id: tagged
doc_mapping:
field_mappings:
- name: seq
type: u64
- name: tag
type: u64
tag_fields: ["tag"]
---
method: POST
endpoint: tagged/ingest
params:
commit: force
ndjson:
- {"seq": 1, "tag": 1}
- {"seq": 2, "tag": 2}
---method: POST
endpoint: tagged/ingest
params:
commit: force
ndjson:
- {"seq": 1, "tag": 1}
- {"seq": 3, "tag": null}
---
method: POST
endpoint: tagged/ingest
params:
commit: force
ndjson:
- {"seq": 4, "tag": 1}
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# Delete index
method: DELETE
endpoint: indexes/simple
---
method: DELETE
endpoint: indexes/tagged

0 comments on commit 36577e2

Please sign in to comment.