Skip to content

Commit

Permalink
de-index items without title/name/label
Browse files Browse the repository at this point in the history
  • Loading branch information
aaxelb committed Aug 16, 2023
1 parent f9142ab commit ddffd6f
Showing 1 changed file with 10 additions and 6 deletions.
16 changes: 10 additions & 6 deletions share/search/index_strategy/trove_indexcard.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,8 @@ def index_mappings(self):

def _build_sourcedoc(self, indexcard_rdf):
_rdfdoc = primitive_rdf.TripledictWrapper(indexcard_rdf.as_rdf_tripledict())
if not any(_rdfdoc.q(indexcard_rdf.focus_iri, NAMELIKE_PROPERTIES)):
return None # skip cards without some value for name/title/label
_nested_iris = defaultdict(set)
_nested_dates = defaultdict(set)
_nested_texts = defaultdict(set)
Expand Down Expand Up @@ -243,12 +245,14 @@ def build_elastic_actions(self, messages_chunk: messages.MessagesChunk):
_suid = _indexcard_rdf.indexcard.source_record_suid
if messages_chunk.message_type.is_backfill and _suid.has_forecompat_replacement():
continue # skip this one, let it get deleted
_remaining_indexcard_ids.discard(_indexcard_rdf.indexcard_id)
_index_action = self.build_index_action(
doc_id=_indexcard_rdf.indexcard.get_iri(),
doc_source=self._build_sourcedoc(_indexcard_rdf),
)
yield _indexcard_rdf.indexcard_id, _index_action
_sourcedoc = self._build_sourcedoc(_indexcard_rdf)
if _sourcedoc:
_index_action = self.build_index_action(
doc_id=_indexcard_rdf.indexcard.get_iri(),
doc_source=_sourcedoc,
)
_remaining_indexcard_ids.discard(_indexcard_rdf.indexcard_id)
yield _indexcard_rdf.indexcard_id, _index_action
# delete any that don't have "latest" rdf
_leftovers = trove_db.Indexcard.objects.filter(id__in=_remaining_indexcard_ids)
for _indexcard in _leftovers:
Expand Down

0 comments on commit ddffd6f

Please sign in to comment.