diff --git a/share/search/index_strategy/trove_indexcard.py b/share/search/index_strategy/trove_indexcard.py index bfd644661..e20e74e17 100644 --- a/share/search/index_strategy/trove_indexcard.py +++ b/share/search/index_strategy/trove_indexcard.py @@ -446,6 +446,10 @@ def _cardsearch_query( _bool_query['must_not'].append(self._cardsearch_iri_filter(_searchfilter)) elif _searchfilter.operator == SearchFilter.FilterOperator.ANY_OF: _bool_query['filter'].append(self._cardsearch_iri_filter(_searchfilter)) + elif _searchfilter.operator == SearchFilter.FilterOperator.IS_PRESENT: + _bool_query['filter'].append(self._cardsearch_presence_filter(_searchfilter)) + elif _searchfilter.operator == SearchFilter.FilterOperator.IS_ABSENT: + _bool_query['must_not'].extend(self._cardsearch_presence_queries(_searchfilter)) elif _searchfilter.operator.is_date_operator(): _bool_query['filter'].append(self._cardsearch_date_filter(_searchfilter)) else: @@ -696,6 +700,38 @@ def _valuesearch_date_result(self, date_bucket): match_count=date_bucket['doc_count'], ) + def _cardsearch_presence_filter(self, search_filter) -> dict: + return {'bool': { + 'minimum_should_match': 1, + 'should': self._cardsearch_presence_queries(search_filter), + }} + + def _cardsearch_presence_queries(self, search_filter) -> list[dict]: + _path_keyword = iri_path_as_keyword( + search_filter.property_path, + suffuniq=True, + ) + return [ + {'nested': { + 'path': 'nested_iri', + 'query': {'term': { + 'nested_iri.suffuniq_path_from_focus': _path_keyword, + }}, + }}, + {'nested': { + 'path': 'nested_date', + 'query': {'term': { + 'nested_date.suffuniq_path_from_focus': _path_keyword, + }}, + }}, + {'nested': { + 'path': 'nested_text', + 'query': {'term': { + 'nested_text.suffuniq_path_from_focus': _path_keyword, + }}, + }}, + ] + def _cardsearch_iri_filter(self, search_filter) -> dict: return {'nested': { 'path': 'nested_iri', diff --git a/share/search/search_request.py b/share/search/search_request.py index 7ba603434..6636b24cb 100644 --- a/share/search/search_request.py +++ b/share/search/search_request.py @@ -136,6 +136,8 @@ class FilterOperator(enum.Enum): # TODO: use iris from TROVE IriNamespace ANY_OF = 'any-of' NONE_OF = 'none-of' + IS_PRESENT = 'is-present' + IS_ABSENT = 'is-absent' BEFORE = 'before' AFTER = 'after' AT_DATE = 'at-date' @@ -146,6 +148,9 @@ def is_date_operator(self): def is_iri_operator(self): return self in (self.ANY_OF, self.NONE_OF) + def is_valueless_operator(self): + return self in (self.IS_PRESENT, self.IS_ABSENT) + property_path: tuple[str] value_set: frozenset[str] operator: FilterOperator @@ -179,7 +184,7 @@ def from_filter_param(cls, param_name: QueryparamName, param_value: str): except ValueError: raise ValueError(f'unrecognized search-filter operator "{_operator_value}"') _propertypath = tuple( - osfmap_labeler.iri_for_label(_pathstep) + osfmap_labeler.iri_for_label(_pathstep, default=_pathstep) for _pathstep in split_queryparam_value(_serialized_path) ) _is_date_property = is_date_property(_propertypath[-1]) @@ -192,16 +197,17 @@ def from_filter_param(cls, param_name: QueryparamName, param_value: str): if _operator.is_date_operator() and not _is_date_property: raise ValueError(f'cannot use date operator {_operator.value} on non-date property') _value_list = [] - for _value in split_queryparam_value(param_value): - if _is_date_property: - _value_list.append(_value) # TODO: vali-date - else: - try: - _iri = osfmap_labeler.iri_for_label(_value) - except KeyError: # not a known shorthand - _value_list.append(_value) # assume iri already + if not _operator.is_valueless_operator(): + for _value in split_queryparam_value(param_value): + if _is_date_property: + _value_list.append(_value) # TODO: vali-date else: - _value_list.append(_iri) + try: + _iri = osfmap_labeler.iri_for_label(_value) + except KeyError: # not a known shorthand + _value_list.append(_value) # assume iri already + else: + _value_list.append(_iri) return cls( property_path=_propertypath, value_set=frozenset(_value_list), diff --git a/trove/trovesearch_gathering.py b/trove/trovesearch_gathering.py index d28285416..95b3aa0d4 100644 --- a/trove/trovesearch_gathering.py +++ b/trove/trovesearch_gathering.py @@ -226,15 +226,16 @@ def gather_card(focus, **kwargs): def _filter_as_blanknode(search_filter, valueinfo_by_iri) -> frozenset: _filtervalue_twoples = [] - for _value in search_filter.value_set: - if search_filter.operator.is_iri_operator(): - _valueinfo = ( - valueinfo_by_iri.get(_value) - or _literal_json({'@id': _value}) - ) - else: - _valueinfo = _literal_json({'@value': _value}) - _filtervalue_twoples.append((TROVE.filterValue, _valueinfo)) + if not search_filter.operator.is_valueless_operator(): + for _value in search_filter.value_set: + if search_filter.operator.is_iri_operator(): + _valueinfo = ( + valueinfo_by_iri.get(_value) + or _literal_json({'@id': _value}) + ) + else: + _valueinfo = _literal_json({'@value': _value}) + _filtervalue_twoples.append((TROVE.filterValue, _valueinfo)) return frozenset(( (TROVE.propertyPath, _literal_json(search_filter.property_path)), (TROVE.osfmapPropertyPath, _osfmap_path(search_filter.property_path)), diff --git a/trove/util/iri_labeler.py b/trove/util/iri_labeler.py index 32ff80087..46cfb1389 100644 --- a/trove/util/iri_labeler.py +++ b/trove/util/iri_labeler.py @@ -49,11 +49,13 @@ def all_labels_by_iri(self) -> dict[str, str]: self.__labels_by_iri = _labels_by_iri return _labels_by_iri - def iri_for_label(self, label: str) -> str: + def iri_for_label(self, label: str, *, default=None) -> str: _labelkey = label for _prefix in self.acceptable_prefixes: if label.startswith(_prefix): _labelkey = label[len(_prefix):] # remove prefix + if default: + return self.all_iris_by_label().get(_labelkey, default) return self.all_iris_by_label()[_labelkey] # may raise KeyError def label_for_iri(self, iri: str) -> str: