diff --git a/daras_ai_v2/azure_doc_extract.py b/daras_ai_v2/azure_doc_extract.py index 4afb67b4c..5b63cc18b 100644 --- a/daras_ai_v2/azure_doc_extract.py +++ b/daras_ai_v2/azure_doc_extract.py @@ -67,6 +67,8 @@ def azure_form_recognizer(url: str, model_id: str, params: dict = None): def extract_records(result: dict, page_num: int) -> list[dict]: + if not result: + return [] table_polys = extract_tables(result, page_num) records = [] for para in result["paragraphs"]: diff --git a/recipes/DocExtract.py b/recipes/DocExtract.py index 5561a3016..621b94b94 100644 --- a/recipes/DocExtract.py +++ b/recipes/DocExtract.py @@ -424,7 +424,11 @@ def process_source( params = dict(pages=str(page_num)) else: params = None - transcript = str(azure_doc_extract_pages(content_url, params=params)[0]) + pages = azure_doc_extract_pages(content_url, params=params) + if pages and pages[0]: + transcript = str(pages[0]) + else: + transcript = "" else: raise NotImplementedError( f"Unsupported type {doc_meta and doc_meta.mime_type} for {webpage_url}"