From 99a284786135cda285b8ee4ee501cb0c6a2ba758 Mon Sep 17 00:00:00 2001 From: Corentin Meyer Date: Mon, 22 Jan 2024 16:39:41 +0100 Subject: [PATCH] Folder of files test --- .../unstructured/tests/test_converter.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/integrations/unstructured/tests/test_converter.py b/integrations/unstructured/tests/test_converter.py index 7ce1069bb..2a9c332f9 100644 --- a/integrations/unstructured/tests/test_converter.py +++ b/integrations/unstructured/tests/test_converter.py @@ -169,3 +169,22 @@ def test_run_one_doc_per_element_with_meta_list_two_files(self, samples_path): assert "category" in doc.meta assert "common_meta" in doc.meta assert doc.meta["common_meta"] == "common" + + @pytest.mark.integration + def test_run_one_doc_per_element_with_meta_list_folder(self, samples_path): + pdf_path = [samples_path] + meta = [{"custom_meta": "foobar", "common_meta": "common"}, {"other_meta": "barfoo", "common_meta": "common"}] + local_converter = UnstructuredFileConverter( + api_url="http://localhost:8000/general/v0/general", document_creation_mode="one-doc-per-element" + ) + + documents = local_converter.run(paths=pdf_path, meta=meta)["documents"] + + assert len(documents) > 4 + for doc in documents: + assert "name" in doc.meta + assert "page_number" in doc.meta + # elements have a category attribute that is saved in the document meta + assert "category" in doc.meta + assert "common_meta" in doc.meta + assert doc.meta["common_meta"] == "common"