Skip to content

Commit

Permalink
Merge pull request #12 from sanders41/indexing-performance
Browse files Browse the repository at this point in the history
Improve indexing performance
  • Loading branch information
prrao87 authored Apr 18, 2023
2 parents d792ca0 + 36a0436 commit 9449c47
Showing 1 changed file with 21 additions and 9 deletions.
30 changes: 21 additions & 9 deletions dbs/meilisearch/scripts/bulk_index.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from __future__ import annotations

import argparse
import asyncio
import glob
Expand All @@ -11,6 +13,7 @@

from dotenv import load_dotenv
from meilisearch_python_async import Client
from meilisearch_python_async.index import Index
from pydantic.main import ModelMetaclass

sys.path.insert(1, os.path.realpath(Path(__file__).resolve().parents[1]))
Expand Down Expand Up @@ -142,24 +145,33 @@ async def _update_sortable_attributes(
await index.update_sortable_attributes(fields)


async def do_indexing(index: Index, data: list[JsonBlob], file_name: str) -> None:
await index.update_documents(data, "id")
print(f"Indexed {Path(file_name).name} to db")


async def main(files: list[str]) -> None:
settings = Settings()
URI = f"http://{settings.meili_url}:{settings.meili_port}"
MASTER_KEY = settings.meili_master_key
async with Client(URI, MASTER_KEY) as client:
await asyncio.gather(
_update_searchable_attributes(client, "wines"),
_update_filterable_attributes(client, "wines"),
_update_sortable_attributes(client, "wines"),
)
index = client.index("wines")
tasks = []
print("Processing files")
for file in files:
data = read_jsonl_from_file(file)
data = validate(data, Wine, exclude_none=True)
try:
# Set id as primary key prior to indexing
await index.update_documents(data, "id")
print(f"Indexed {Path(file).name} to db")
except Exception as e:
print(f"{e}: Failed to index {Path(file).name} to db")
await _update_searchable_attributes(client, "wines")
await _update_filterable_attributes(client, "wines")
await _update_sortable_attributes(client, "wines")
tasks.append(do_indexing(index, data, file))
try:
# Set id as primary key prior to indexing
await asyncio.gather(*tasks)
except Exception as e:
print(f"{e}: Error while indexing to db")
print(f"Finished indexing {len(files)} JSONL files to db")


Expand Down

0 comments on commit 9449c47

Please sign in to comment.