Skip to content

Commit

Permalink
add filter operators: is-present, is-absent
Browse files Browse the repository at this point in the history
e.g. `cardSearchFilter[funder][is-present]`
  • Loading branch information
aaxelb committed Aug 15, 2023
1 parent ec3fc80 commit 0b8b5df
Show file tree
Hide file tree
Showing 4 changed files with 65 additions and 20 deletions.
36 changes: 36 additions & 0 deletions share/search/index_strategy/trove_indexcard.py
Original file line number Diff line number Diff line change
Expand Up @@ -446,6 +446,10 @@ def _cardsearch_query(
_bool_query['must_not'].append(self._cardsearch_iri_filter(_searchfilter))
elif _searchfilter.operator == SearchFilter.FilterOperator.ANY_OF:
_bool_query['filter'].append(self._cardsearch_iri_filter(_searchfilter))
elif _searchfilter.operator == SearchFilter.FilterOperator.IS_PRESENT:
_bool_query['filter'].append(self._cardsearch_presence_filter(_searchfilter))
elif _searchfilter.operator == SearchFilter.FilterOperator.IS_ABSENT:
_bool_query['must_not'].extend(self._cardsearch_presence_queries(_searchfilter))
elif _searchfilter.operator.is_date_operator():
_bool_query['filter'].append(self._cardsearch_date_filter(_searchfilter))
else:
Expand Down Expand Up @@ -696,6 +700,38 @@ def _valuesearch_date_result(self, date_bucket):
match_count=date_bucket['doc_count'],
)

def _cardsearch_presence_filter(self, search_filter) -> dict:
return {'bool': {
'minimum_should_match': 1,
'should': self._cardsearch_presence_queries(search_filter),
}}

def _cardsearch_presence_queries(self, search_filter) -> list[dict]:
_path_keyword = iri_path_as_keyword(
search_filter.property_path,
suffuniq=True,
)
return [
{'nested': {
'path': 'nested_iri',
'query': {'term': {
'nested_iri.suffuniq_path_from_focus': _path_keyword,
}},
}},
{'nested': {
'path': 'nested_date',
'query': {'term': {
'nested_date.suffuniq_path_from_focus': _path_keyword,
}},
}},
{'nested': {
'path': 'nested_text',
'query': {'term': {
'nested_text.suffuniq_path_from_focus': _path_keyword,
}},
}},
]

def _cardsearch_iri_filter(self, search_filter) -> dict:
return {'nested': {
'path': 'nested_iri',
Expand Down
26 changes: 16 additions & 10 deletions share/search/search_request.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,8 @@ class FilterOperator(enum.Enum):
# TODO: use iris from TROVE IriNamespace
ANY_OF = 'any-of'
NONE_OF = 'none-of'
IS_PRESENT = 'is-present'
IS_ABSENT = 'is-absent'
BEFORE = 'before'
AFTER = 'after'
AT_DATE = 'at-date'
Expand All @@ -146,6 +148,9 @@ def is_date_operator(self):
def is_iri_operator(self):
return self in (self.ANY_OF, self.NONE_OF)

def is_valueless_operator(self):
return self in (self.IS_PRESENT, self.IS_ABSENT)

property_path: tuple[str]
value_set: frozenset[str]
operator: FilterOperator
Expand Down Expand Up @@ -179,7 +184,7 @@ def from_filter_param(cls, param_name: QueryparamName, param_value: str):
except ValueError:
raise ValueError(f'unrecognized search-filter operator "{_operator_value}"')
_propertypath = tuple(
osfmap_labeler.iri_for_label(_pathstep)
osfmap_labeler.iri_for_label(_pathstep, default=_pathstep)
for _pathstep in split_queryparam_value(_serialized_path)
)
_is_date_property = is_date_property(_propertypath[-1])
Expand All @@ -192,16 +197,17 @@ def from_filter_param(cls, param_name: QueryparamName, param_value: str):
if _operator.is_date_operator() and not _is_date_property:
raise ValueError(f'cannot use date operator {_operator.value} on non-date property')
_value_list = []
for _value in split_queryparam_value(param_value):
if _is_date_property:
_value_list.append(_value) # TODO: vali-date
else:
try:
_iri = osfmap_labeler.iri_for_label(_value)
except KeyError: # not a known shorthand
_value_list.append(_value) # assume iri already
if not _operator.is_valueless_operator():
for _value in split_queryparam_value(param_value):
if _is_date_property:
_value_list.append(_value) # TODO: vali-date
else:
_value_list.append(_iri)
try:
_iri = osfmap_labeler.iri_for_label(_value)
except KeyError: # not a known shorthand
_value_list.append(_value) # assume iri already
else:
_value_list.append(_iri)
return cls(
property_path=_propertypath,
value_set=frozenset(_value_list),
Expand Down
19 changes: 10 additions & 9 deletions trove/trovesearch_gathering.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,15 +226,16 @@ def gather_card(focus, **kwargs):

def _filter_as_blanknode(search_filter, valueinfo_by_iri) -> frozenset:
_filtervalue_twoples = []
for _value in search_filter.value_set:
if search_filter.operator.is_iri_operator():
_valueinfo = (
valueinfo_by_iri.get(_value)
or _literal_json({'@id': _value})
)
else:
_valueinfo = _literal_json({'@value': _value})
_filtervalue_twoples.append((TROVE.filterValue, _valueinfo))
if not search_filter.operator.is_valueless_operator():
for _value in search_filter.value_set:
if search_filter.operator.is_iri_operator():
_valueinfo = (
valueinfo_by_iri.get(_value)
or _literal_json({'@id': _value})
)
else:
_valueinfo = _literal_json({'@value': _value})
_filtervalue_twoples.append((TROVE.filterValue, _valueinfo))
return frozenset((
(TROVE.propertyPath, _literal_json(search_filter.property_path)),
(TROVE.osfmapPropertyPath, _osfmap_path(search_filter.property_path)),
Expand Down
4 changes: 3 additions & 1 deletion trove/util/iri_labeler.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,11 +49,13 @@ def all_labels_by_iri(self) -> dict[str, str]:
self.__labels_by_iri = _labels_by_iri
return _labels_by_iri

def iri_for_label(self, label: str) -> str:
def iri_for_label(self, label: str, *, default=None) -> str:
_labelkey = label
for _prefix in self.acceptable_prefixes:
if label.startswith(_prefix):
_labelkey = label[len(_prefix):] # remove prefix
if default:
return self.all_iris_by_label().get(_labelkey, default)
return self.all_iris_by_label()[_labelkey] # may raise KeyError

def label_for_iri(self, iri: str) -> str:
Expand Down

0 comments on commit 0b8b5df

Please sign in to comment.