From d0ce6ced7aca57d4914d1bb0128cee85f9f15d10 Mon Sep 17 00:00:00 2001 From: Matias Brignone Date: Tue, 30 Jan 2024 17:09:24 -0300 Subject: [PATCH 1/3] feat: add command line option to avoid deleting the index when doing a refresh --- meilisync/main.py | 4 ++++ meilisync/meili.py | 50 +++++++++++++++++++++++++++------------------- 2 files changed, 33 insertions(+), 21 deletions(-) diff --git a/meilisync/main.py b/meilisync/main.py index 8b772cc..a53330a 100644 --- a/meilisync/main.py +++ b/meilisync/main.py @@ -149,6 +149,9 @@ def refresh( size: int = typer.Option( 10000, "-s", "--size", help="Size of data for each insert to be inserted into MeiliSearch" ), + keep_index: bool = typer.Option( + False, "-d", "--keep-index", help="Flag to delete the existing index before doing the sync" + ), ): async def _(): settings = context.obj["settings"] @@ -162,6 +165,7 @@ async def _(): count = await meili.refresh_data( sync, source.get_full_data(sync, size), + keep_index, ) if count: logger.info( diff --git a/meilisync/meili.py b/meilisync/meili.py index acd9fda..cc4efb3 100644 --- a/meilisync/meili.py +++ b/meilisync/meili.py @@ -31,22 +31,27 @@ async def add_data(self, sync: Sync, data: list): events = [Event(type=EventType.create, data=item) for item in data] return await self.handle_events_by_type(sync, events, EventType.create) - async def refresh_data(self, sync: Sync, data: AsyncGenerator): + async def refresh_data(self, sync: Sync, data: AsyncGenerator, keep_index: bool = False): index = sync.index_name pk = sync.pk - sync.index = index_name_tmp = f"{index}_tmp" - try: - await self.client.index(index_name_tmp).delete() - except MeilisearchApiError as e: - if e.code != "MeilisearchApiError.index_not_found": - raise - settings = await self.client.index(index).get_settings() - index_tmp = await self.client.create_index(index_name_tmp, primary_key=pk) - task = await index_tmp.update_settings(settings) - logger.info(f"Waiting for update tmp index {index_name_tmp} settings to complete...") - await self.client.wait_for_task( - task_id=task.task_uid, timeout_in_ms=self.wait_for_task_timeout - ) + if not keep_index: + sync.index = index_name_tmp = f"{index}_tmp" + try: + await self.client.index(index_name_tmp).delete() + except MeilisearchApiError as e: + if e.code != "MeilisearchApiError.index_not_found": + raise + settings = await self.client.index(index).get_settings() + index_tmp = await self.client.create_index(index_name_tmp, primary_key=pk) + task = await index_tmp.update_settings(settings) + logger.info(f"Waiting for update tmp index {index_name_tmp} settings to complete...") + await self.client.wait_for_task( + task_id=task.task_uid, timeout_in_ms=self.wait_for_task_timeout + ) + else: + logger.info("Not deleting index when refreshing data") + index_name_tmp = index + tasks = [] count = 0 async for items in data: @@ -61,13 +66,16 @@ async def refresh_data(self, sync: Sync, data: AsyncGenerator): ] logger.info(f"Waiting for insert tmp index {index_name_tmp} to complete...") await asyncio.gather(*wait_tasks) - task = await self.client.swap_indexes([(index, index_name_tmp)]) - logger.info(f"Waiting for swap index {index} to complete...") - await self.client.wait_for_task( - task_id=task.task_uid, timeout_in_ms=self.wait_for_task_timeout - ) - await self.client.index(index_name_tmp).delete() - logger.success(f"Swap index {index} complete") + + if not keep_index: + task = await self.client.swap_indexes([(index, index_name_tmp)]) + logger.info(f"Waiting for swap index {index} to complete...") + await self.client.wait_for_task( + task_id=task.task_uid, timeout_in_ms=self.wait_for_task_timeout + ) + await self.client.index(index_name_tmp).delete() + logger.success(f"Swap index {index} complete") + return count async def get_count(self, index: str): From c66e5332a92b29cfec6ed91fc0fcabc1e9a190dd Mon Sep 17 00:00:00 2001 From: Matias Brignone Date: Tue, 30 Jan 2024 17:55:01 -0300 Subject: [PATCH 2/3] fix: include table info in the event when doing a full sync --- meilisync/meili.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/meilisync/meili.py b/meilisync/meili.py index cc4efb3..60a2419 100644 --- a/meilisync/meili.py +++ b/meilisync/meili.py @@ -28,7 +28,7 @@ def __init__( self.wait_for_task_timeout = wait_for_task_timeout async def add_data(self, sync: Sync, data: list): - events = [Event(type=EventType.create, data=item) for item in data] + events = [Event(type=EventType.create, data=item, table=sync.table) for item in data] return await self.handle_events_by_type(sync, events, EventType.create) async def refresh_data(self, sync: Sync, data: AsyncGenerator, keep_index: bool = False): From 63a7d0fc5aac589b2c5e928dad65b5310555bd45 Mon Sep 17 00:00:00 2001 From: Matias Brignone Date: Wed, 31 Jan 2024 09:39:17 -0300 Subject: [PATCH 3/3] docs: update README --- README.md | 4 ++-- meilisync/main.py | 5 ++++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index b5b1af7..7ed377b 100644 --- a/README.md +++ b/README.md @@ -169,7 +169,7 @@ created only once, otherwise, the plugin instance will be created for each event The progress is used to record the last sync position, such as binlog position for MySQL. -- `type`: `file` or `redis`, if set to file, another option `path` is required. +- `type`: `file` or `redis`, if set to file, the `path` option can be used to specify the path. - `path`: the file path to store the progress, default is `progress.json`. - `key`: the redis key to store the progress, default is `meilisync:progress`. - `dsn`: the redis dsn, default is `redis://localhost:6379/0`. @@ -204,7 +204,7 @@ The sync configuration, you can add multiple sync tasks. - `table`: the database table name or collection name. - `index`: the Meilisearch index name, if not set, it will use the table name. -- `full`: whether to do a full sync, default is `false`. +- `full`: whether to do a full sync, default is `false`. If the index already exists, the full sync won't take place. - `fields`: the fields to sync, if not set, it will sync all fields. The key is table field name, the value is the Meilisearch field name, if not set, it will use the table field name. - `plugins`: the table level plugins, optional. diff --git a/meilisync/main.py b/meilisync/main.py index a53330a..f08cc29 100644 --- a/meilisync/main.py +++ b/meilisync/main.py @@ -150,7 +150,10 @@ def refresh( 10000, "-s", "--size", help="Size of data for each insert to be inserted into MeiliSearch" ), keep_index: bool = typer.Option( - False, "-d", "--keep-index", help="Flag to delete the existing index before doing the sync" + False, + "-d", + "--keep-index", + help="Flag to avoid deleting the existing index before doing a full sync.", ), ): async def _():