From 977edf0f40ceaf7f9b6a494a2796a6f92df4a591 Mon Sep 17 00:00:00 2001 From: emrgnt-cmplxty <68796651+emrgnt-cmplxty@users.noreply.github.com> Date: Tue, 29 Oct 2024 18:51:36 -0700 Subject: [PATCH] Feature/add poppler check and fallback (#1529) * fix actions * fallback --- py/core/providers/ingestion/r2r/base.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/py/core/providers/ingestion/r2r/base.py b/py/core/providers/ingestion/r2r/base.py index 98c064301..9068c8573 100644 --- a/py/core/providers/ingestion/r2r/base.py +++ b/py/core/providers/ingestion/r2r/base.py @@ -211,7 +211,13 @@ def check_vlm(model_name: str) -> bool: or self.config.vision_pdf_model ) - if document.document_type == DocumentType.PDF and is_not_vlm: + has_not_poppler = not bool( + shutil.which("pdftoppm") + ) # Check if poppler is installed + + if document.document_type == DocumentType.PDF and ( + is_not_vlm or has_not_poppler + ): logger.info( f"Reverting to basic PDF parser as the provided is not a proper VLM model." )