Skip to content

Commit

Permalink
parser: add separate grammar for > and < date operators
Browse files Browse the repository at this point in the history
  • Loading branch information
MJedr committed Oct 6, 2022
1 parent c8e1a35 commit ab62b4e
Show file tree
Hide file tree
Showing 7 changed files with 154 additions and 89 deletions.
8 changes: 8 additions & 0 deletions inspire_query_parser/ast.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,14 @@ class GreaterThanOp(UnaryOp):
pass


class GreaterThanDateOp(UnaryOp):
pass


class LessThanDateOp(UnaryOp):
pass


class LessThanOp(UnaryOp):
pass

Expand Down
28 changes: 22 additions & 6 deletions inspire_query_parser/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -367,7 +367,7 @@ class SimpleValueWithColonUnit(SimpleValueUnit):


class SimpleDateValueUnit(LeafRule):
grammar = re.compile(r"[\d*\-\.\/]{4,10}(?=($|\s|\)))", re.UNICODE)
grammar = re.compile(r"[\d*\-\.\/\_]{1,10}(?=($|\s|\)))", re.UNICODE)
date_specifiers_regex = re.compile(r"({})\s*(-\s*\d+)?".format('|'.join(DATE_SPECIFIERS_COLLECTION)), re.UNICODE)
string_month_date_regex = re.compile(MONTH_REGEX, re.IGNORECASE)

Expand Down Expand Up @@ -555,6 +555,7 @@ def parse(cls, parser, text, pos):
GreaterEqualOp,
LessEqualOp,
GreaterThanOp,
GreaterThanDateOp,
LessThanOp,
ComplexValue
]
Expand Down Expand Up @@ -600,7 +601,6 @@ def parse(cls, parser, text, pos):
SimpleValueNegation,
SimpleValue,
SimpleDateValueNegation,
SimpleDateValue,
]
)

Expand Down Expand Up @@ -652,7 +652,15 @@ class GreaterThanOp(UnaryRule):
Supports queries like author-count > 2000 or date after 10-2000.
"""
grammar = omit(re.compile(r"after|>", re.IGNORECASE)), attr('op', [SimpleDateValue, SimpleValue])
grammar = omit(re.compile(r">", re.IGNORECASE)), attr('op', [SimpleValue])


class GreaterThanDateOp(UnaryRule):
"""Greater than operator.
Supports queries like author-count > 2000 or date after 10-2000.
"""
grammar = omit(re.compile(r"after|>", re.IGNORECASE)), attr('op', [SimpleDateValue])


class GreaterEqualOp(UnaryRule):
Expand All @@ -673,7 +681,15 @@ class LessThanOp(UnaryRule):
Supports queries like author-count < 100 or date before 1984.
"""
grammar = omit(re.compile(r"before|<", re.IGNORECASE)), attr('op', [SimpleDateValue, SimpleValue])
grammar = omit(re.compile(r"<", re.IGNORECASE)), attr('op', [SimpleValue])


class LessThanDateOp(UnaryRule):
"""Less than operator.
Supports queries like author-count < 100 or date before 1984.
"""
grammar = omit(re.compile(r"before|<", re.IGNORECASE)), attr('op', [SimpleDateValue])


class LessEqualOp(UnaryRule):
Expand Down Expand Up @@ -740,8 +756,8 @@ class DateValue(UnaryRule):
(optional(omit(Literal("="))), RangeOp),
GreaterEqualOp,
LessEqualOp,
GreaterThanOp,
LessThanOp,
GreaterThanDateOp,
LessThanDateOp,
(
optional(omit(Literal("="))),
[
Expand Down
6 changes: 6 additions & 0 deletions inspire_query_parser/visitors/elastic_search_visitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -649,12 +649,18 @@ def visit_range_op(self, node, fieldnames):
def visit_greater_than_op(self, node, fieldnames):
return self._generate_range_queries(force_list(fieldnames), {'gt': node.op.value})

def visit_greater_than_date_op(self, node, fieldnames):
return self._generate_range_queries(force_list(fieldnames), {'gt': node.op.value})

def visit_greater_equal_than_op(self, node, fieldnames):
return self._generate_range_queries(force_list(fieldnames), {'gte': node.op.value})

def visit_less_than_op(self, node, fieldnames):
return self._generate_range_queries(force_list(fieldnames), {'lt': node.op.value})

def visit_less_than_date_op(self, node, fieldnames):
return self._generate_range_queries(force_list(fieldnames), {'lt': node.op.value})

def visit_less_equal_than_op(self, node, fieldnames):
return self._generate_range_queries(force_list(fieldnames), {'lte': node.op.value})

Expand Down
6 changes: 6 additions & 0 deletions inspire_query_parser/visitors/restructuring_visitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,9 @@ def visit_value(self, node):
def visit_range_op(self, node):
return ast.RangeOp(node.left.accept(self), node.right.accept(self))

def visit_greater_than_date_op(self, node):
return ast.GreaterThanDateOp(node.op.accept(self))

def visit_greater_than_op(self, node):
return ast.GreaterThanOp(node.op.accept(self))

Expand All @@ -279,6 +282,9 @@ def visit_greater_equal_op(self, node):
return ast.GreaterEqualThanOp(value)

def visit_less_than_op(self, node):
return ast.LessThanDateOp(node.op.accept(self))

def visit_less_than_date_op(self, node):
return ast.LessThanOp(node.op.accept(self))

def visit_less_equal_op(self, node):
Expand Down
18 changes: 18 additions & 0 deletions tests/test_elastic_search_visitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -3235,3 +3235,21 @@ def test_elastic_search_visitor_complex_query():
}
generated_es_query = _parse_query(query_str)
assert generated_es_query == expected_es_query


def test_elastic_search_visitor_regression_greater_than_for_non_date():
query_str = "t after something"
expected_es_query = {
"match": {"titles.full_title": {"query": "after something", "operator": "and"}}
}
generated_es_query = _parse_query(query_str)
assert generated_es_query == expected_es_query


def test_elastic_search_visitor_regression_less_than_for_non_date():
query_str = "t before something"
expected_es_query = {
"match": {"titles.full_title": {"query": "before something", "operator": "and"}}
}
generated_es_query = _parse_query(query_str)
assert generated_es_query == expected_es_query
136 changes: 76 additions & 60 deletions tests/test_parser_functionality.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from inspire_query_parser.parser import (And, BooleanQuery, ComplexValue,
DateValue, EmptyQuery, Expression,
GreaterEqualOp, GreaterThanOp,
GreaterThanDateOp,
InspireDateKeyword, InspireKeyword,
InvenioKeywordQuery, LessEqualOp,
LessThanOp, MalformedQueryWords,
Expand Down Expand Up @@ -1762,64 +1763,6 @@
("", Query([EmptyQuery()])),
(" ", Query([EmptyQuery()])),
# G, GE, LT, LE, E queries
(
"date > 2000-10 and < 2000-12",
Query(
[
Statement(
BooleanQuery(
Expression(
SimpleQuery(
SpiresDateKeywordQuery(
InspireDateKeyword("date"),
DateValue(
GreaterThanOp(SimpleDateValue("2000-10"))
),
)
)
),
And(),
Statement(
Expression(
SimpleQuery(
Value(LessThanOp(SimpleDateValue("2000-12")))
)
)
),
)
)
]
),
),
(
"date after 10/2000 and before 2000-12",
Query(
[
Statement(
BooleanQuery(
Expression(
SimpleQuery(
SpiresDateKeywordQuery(
InspireDateKeyword("date"),
DateValue(
GreaterThanOp(SimpleDateValue("10/2000"))
),
)
)
),
And(),
Statement(
Expression(
SimpleQuery(
Value(LessThanOp(SimpleDateValue("2000-12")))
)
)
),
)
)
]
),
),
(
"date >= nov 2000 and d<=2005",
Query(
Expand Down Expand Up @@ -2070,7 +2013,7 @@
SpiresDateKeywordQuery(
InspireDateKeyword("date-updated"),
DateValue(
GreaterThanOp(SimpleDateValue("yesterday - 2"))
GreaterThanDateOp(SimpleDateValue("yesterday - 2"))
),
)
)
Expand Down Expand Up @@ -2112,7 +2055,7 @@
SpiresDateKeywordQuery(
InspireDateKeyword("date"),
DateValue(
GreaterThanOp(
GreaterThanDateOp(
SimpleDateValue("2013")
)
),
Expand Down Expand Up @@ -2335,3 +2278,76 @@ def test_parser_functionality(query_str, expected_parse_tree):
parser = StatefulParser()
_, parse_tree = parser.parse(query_str, Query)
assert parse_tree == expected_parse_tree


@pytest.mark.parametrize(
["query_str", "expected_parse_tree"],
{
(
"date > 2000-10 and < 2000-12",
Query(
[
Statement(
BooleanQuery(
Expression(
SimpleQuery(
SpiresDateKeywordQuery(
InspireDateKeyword("date"),
DateValue(
GreaterThanOp(SimpleDateValue("2000-10"))
),
)
)
),
And(),
Statement(
Expression(
SimpleQuery(
Value(LessThanOp(SimpleDateValue("2000-12")))
)
)
),
)
)
]
),
),
(
"date after 10/2000 and before 2000-12",
Query(
[
Statement(
BooleanQuery(
Expression(
SimpleQuery(
SpiresDateKeywordQuery(
InspireDateKeyword("date"),
DateValue(
GreaterThanOp(SimpleDateValue("10/2000"))
),
)
)
),
And(),
Statement(
Expression(
SimpleQuery(
Value(LessThanOp(SimpleDateValue("2000-12")))
)
)
),
)
)
]
),
),
},
)
@pytest.mark.xfail(
reason="the queries are not correct, should be fixed by https://github.com/cern-sis/issues-inspire/issues/150 "
)
def test_parser_functionality_regressions(query_str, expected_parse_tree):
print("Parsing: " + query_str)
parser = StatefulParser()
_, parse_tree = parser.parse(query_str, Query)
assert parse_tree == expected_parse_tree
41 changes: 18 additions & 23 deletions tests/test_restructuring_visitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
from inspire_query_parser import parser
from inspire_query_parser.ast import (AndOp, EmptyQuery, ExactMatchValue,
GreaterEqualThanOp, GreaterThanOp,
GreaterThanDateOp,LessThanDateOp,
Keyword, KeywordOp, LessEqualThanOp,
LessThanOp, MalformedQuery,
NestedKeywordOp, NotOp, OrOp,
Expand Down Expand Up @@ -354,17 +355,11 @@
# G, GE, LT, LE, E queries
(
'date > 2000-10 and date < 2000-12',
AndOp(
KeywordOp(Keyword('date'), GreaterThanOp(Value('2000-10'))),
KeywordOp(Keyword('date'), LessThanOp(Value('2000-12')))
)
AndOp(KeywordOp(Keyword('date'), GreaterThanDateOp(Value('2000-10'))), KeywordOp(Keyword('date'), LessThanOp(Value('2000-12'))))
),
(
'date after 10/2000 and date before 2000-12',
AndOp(
KeywordOp(Keyword('date'), GreaterThanOp(Value('10/2000'))),
KeywordOp(Keyword('date'), LessThanOp(Value('2000-12')))
)
AndOp(KeywordOp(Keyword('date'), GreaterThanDateOp(Value('10/2000'))), KeywordOp(Keyword('date'), LessThanOp(Value('2000-12'))))
),
(
'date >= nov 2000 and d<=2005',
Expand Down Expand Up @@ -445,23 +440,11 @@
'du > yesterday - 2',
KeywordOp(
Keyword('date-updated'),
GreaterThanOp(Value(str((date.today() - relativedelta(days=3)))))
GreaterThanDateOp(Value(str((date.today() - relativedelta(days=3)))))
)
),
# Wildcard queries
(
'find a \'o*aigh\' and t "alge*" and date >2013',
AndOp(
KeywordOp(Keyword('author'), PartialMatchValue('o*aigh', contains_wildcard=True)),
AndOp(
KeywordOp(Keyword('title'), ExactMatchValue('alge*'
)),
KeywordOp(Keyword('date'), GreaterThanOp(Value('2013')))
)
)
),
(
'a *alge | a alge* | a o*aigh',
OrOp(
Expand All @@ -476,7 +459,19 @@
'find texkey Hirata:1992*',
KeywordOp(Keyword('texkeys'), Value('Hirata:1992*', contains_wildcard=True))
),
(
"find a 'o*aigh' and t \"alge*\" and date >2013",
AndOp(
KeywordOp(
Keyword("author"),
PartialMatchValue("o*aigh", contains_wildcard=True),
),
AndOp(
KeywordOp(Keyword("title"), ExactMatchValue("alge*")),
KeywordOp(Keyword("date"), GreaterThanDateOp(Value("2013"))),
),
),
),
# Queries for implicit "and" removal
('title and foo', AndOp(ValueOp(Value('title')), ValueOp(Value('foo')))),
('author takumi doi', KeywordOp(Keyword('author'), Value('takumi doi'))),
Expand Down Expand Up @@ -711,7 +706,7 @@ def test_foo_bar():
)
),
('find cc italy', KeywordOp(Keyword('country'), Value('italy'))),
('fin date > today', KeywordOp(Keyword('date'), GreaterThanOp(Value(str(date.today()))))),
('fin date > today', KeywordOp(Keyword('date'), GreaterThanDateOp(Value(str(date.today()))))),
('find r atlas-conf-*', KeywordOp(Keyword('reportnumber'), Value('atlas-conf-*', contains_wildcard=True))),
(
'find caption "Diagram for the fermion flow violating process"',
Expand Down

0 comments on commit ab62b4e

Please sign in to comment.