From 1dd77942a91c9315141ca38e4d337acdbdb80077 Mon Sep 17 00:00:00 2001 From: Austin Walker Date: Fri, 28 Jun 2024 10:21:53 -0400 Subject: [PATCH] chore: Bump the default split page concurrency (#122) Verified that this shows a speedup by doing a local pip install and running the following snippet before and after the change: ``` from unstructured_client import UnstructuredClient from unstructured_client.models import shared s = UnstructuredClient( server_url=SERVER_URL, api_key_auth=API_KEY, ) filename = "../_sample_docs/layout-parser-paper.pdf" with open(filename, "rb") as f: # Note that this currently only supports a single file files=shared.Files( content=f.read(), file_name=filename, ) req = shared.PartitionParameters( files=files, strategy="hi_res", ) start_time = time.time() resp = s.general.partition(req) end_time = time.time() print(f"Elapsed time: {end_time - start_time} seconds") ``` --- src/unstructured_client/_hooks/custom/split_pdf_hook.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/unstructured_client/_hooks/custom/split_pdf_hook.py b/src/unstructured_client/_hooks/custom/split_pdf_hook.py index fe988069..a983aa91 100644 --- a/src/unstructured_client/_hooks/custom/split_pdf_hook.py +++ b/src/unstructured_client/_hooks/custom/split_pdf_hook.py @@ -36,7 +36,7 @@ DEFAULT_STARTING_PAGE_NUMBER = 1 -DEFAULT_CONCURRENCY_LEVEL = 5 +DEFAULT_CONCURRENCY_LEVEL = 8 MAX_CONCURRENCY_LEVEL = 15 MIN_PAGES_PER_SPLIT = 2 MAX_PAGES_PER_SPLIT = 20