From 4212ec4790df1fb8b20a5e371c37ce091b7ed258 Mon Sep 17 00:00:00 2001 From: berkecanrizai <63911408+berkecanrizai@users.noreply.github.com> Date: Tue, 23 Jul 2024 11:28:23 +0300 Subject: [PATCH] fix: slide-search rm dumped files if they are rmed in the source (#6947) GitOrigin-RevId: 5cae9b0388e3bf121f00bf6331e13abec07e65ae --- examples/pipelines/slides_ai_search/app.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/examples/pipelines/slides_ai_search/app.py b/examples/pipelines/slides_ai_search/app.py index 47b67df..887082d 100644 --- a/examples/pipelines/slides_ai_search/app.py +++ b/examples/pipelines/slides_ai_search/app.py @@ -53,8 +53,16 @@ def dump_img_callback(key, row, time, is_addition): def dump_file_callback(key, row, time, is_addition): # save parsed files file_name = row["path"].value.split("/")[-1] - with open(f"{FILE_DUMP_FOLDER}/{file_name}", "wb") as f: - f.write(row["data"]) + file_path = f"{FILE_DUMP_FOLDER}/{file_name}" + if is_addition: + with open(file_path, "wb") as f: + f.write(row["data"]) + else: + try: + os.remove(file_path) + logging.info(f"Removed file: {file_path}") + except Exception as e: + logging.info("Error removing %s: %s", file_name, e) if __name__ == "__main__":