Skip to content

Commit

Permalink
backend: omit incomplete translations from typesense
Browse files Browse the repository at this point in the history
  • Loading branch information
kevinstadler committed Jan 13, 2025
1 parent 08b25b4 commit 4d0b80a
Showing 1 changed file with 9 additions and 3 deletions.
12 changes: 9 additions & 3 deletions scripts/3_to_typesense.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,8 @@ def del_empty_strings(o, field_names):
for t in translations:
t["work"] = works[t["work"] - 1]
t["translators"] = [translators[t_id - 1] for t_id in t["translators"]]
if "MISSING" in t["title"]:
t["title"] = "???"
# work around https://typesense.org/docs/guide/tips-for-searching-common-types-of-data.html#searching-for-null-or-empty-values
# for the /translators page
t["has_translators"] = len(t["translators"]) != 0
Expand Down Expand Up @@ -194,7 +196,11 @@ def del_empty_strings(o, field_names):
if not w["short_title"]:
w["short_title"] = w["title"]

pub["contains"] = [translations[t_id - 1] for t_id in pub["contains"]]
pub["contains"] = [
translations[t_id - 1]
for t_id in pub["contains"]
if "MISSING" not in translations[t_id - 1]["work"]["title"]
]
pub["language"] = languages[pub["language"]]

pub["images"] = (
Expand Down Expand Up @@ -252,10 +258,10 @@ def del_empty_strings(o, field_names):


if r["num_documents"] > 0:
logging.info(f'Clearing {r["num_documents"]} existing documents')
logging.info(f"Clearing {r['num_documents']} existing documents")
r = client.collections[collection_name].documents.delete({"filter_by": 'id :!= ""'})
logging.info(
f'Cleared {r["num_deleted"]} documents from collection {collection_name}'
f"Cleared {r['num_deleted']} documents from collection {collection_name}"
)

logging.info(f"importing {len(publications)} documents")
Expand Down

0 comments on commit 4d0b80a

Please sign in to comment.