From 1332c74d98363d87ff38824eb30b51408eb3cf41 Mon Sep 17 00:00:00 2001
From: Alan Konarski <alan.konarski@deepsense.ai>
Date: Mon, 23 Sep 2024 11:57:23 +0200
Subject: [PATCH] Move unstructured tests to integration tests folder

---
 .../tests/integration/test_unstructured.py    | 72 +++++++++++++++++++
 .../tests/unit/test_document_processor.py     | 40 -----------
 .../tests/unit/test_providers.py              | 32 ---------
 3 files changed, 72 insertions(+), 72 deletions(-)
 create mode 100644 packages/ragbits-document-search/tests/integration/test_unstructured.py

diff --git a/packages/ragbits-document-search/tests/integration/test_unstructured.py b/packages/ragbits-document-search/tests/integration/test_unstructured.py
new file mode 100644
index 00000000..a48c1f49
--- /dev/null
+++ b/packages/ragbits-document-search/tests/integration/test_unstructured.py
@@ -0,0 +1,72 @@
+from pathlib import Path
+
+import pytest
+
+from ragbits.document_search.documents.document import DocumentMeta, DocumentType
+from ragbits.document_search.ingestion.document_processor import DocumentProcessor
+from ragbits.document_search.ingestion.providers.unstructured import (
+    DEFAULT_PARTITION_KWARGS,
+    UNSTRUCTURED_API_KEY_ENV,
+    UNSTRUCTURED_API_URL_ENV,
+    UnstructuredProvider,
+)
+
+from ..helpers import env_vars_not_set
+
+
+@pytest.mark.skipif(
+    env_vars_not_set([UNSTRUCTURED_API_URL_ENV, UNSTRUCTURED_API_KEY_ENV]),
+    reason="Unstructured API environment variables not set",
+)
+async def test_document_processor_processes_text_document_with_unstructured_provider():
+    document_processor = DocumentProcessor.from_config()
+    document_meta = DocumentMeta.create_text_document_from_literal("Name of Peppa's brother is George.")
+
+    elements = await document_processor.process(document_meta)
+
+    assert isinstance(document_processor._providers[DocumentType.TXT], UnstructuredProvider)
+    assert len(elements) == 1
+    assert elements[0].content == "Name of Peppa's brother is George"
+
+
+@pytest.mark.skipif(
+    env_vars_not_set([UNSTRUCTURED_API_URL_ENV, UNSTRUCTURED_API_KEY_ENV]),
+    reason="Unstructured API environment variables not set",
+)
+async def test_document_processor_processes_md_document_with_unstructured_provider():
+    document_processor = DocumentProcessor.from_config()
+    document_meta = DocumentMeta.from_local_path(Path(__file__).parent.parent.parent.parent.parent / "README.md")
+
+    elements = await document_processor.process(document_meta)
+
+    assert len(elements) > 0
+    assert elements[0].content == "Ragbits"
+
+
+@pytest.mark.skipif(
+    env_vars_not_set([UNSTRUCTURED_API_URL_ENV, UNSTRUCTURED_API_KEY_ENV]),
+    reason="Unstructured API environment variables not set",
+)
+async def test_unstructured_provider_document_with_default_partition_kwargs():
+    document_meta = DocumentMeta.create_text_document_from_literal("Name of Peppa's brother is George.")
+    unstructured_provider = UnstructuredProvider()
+    elements = await unstructured_provider.process(document_meta)
+
+    assert unstructured_provider.partition_kwargs == DEFAULT_PARTITION_KWARGS
+    assert len(elements) == 1
+    assert elements[0].content == "Name of Peppa's brother is George."
+
+
+@pytest.mark.skipif(
+    env_vars_not_set([UNSTRUCTURED_API_URL_ENV, UNSTRUCTURED_API_KEY_ENV]),
+    reason="Unstructured API environment variables not set",
+)
+async def test_unstructured_provider_document_with_custom_partition_kwargs():
+    document_meta = DocumentMeta.create_text_document_from_literal("Name of Peppa's brother is George.")
+    partition_kwargs = {"languages": ["pl"], "strategy": "fast"}
+    unstructured_provider = UnstructuredProvider(partition_kwargs=partition_kwargs)
+    elements = await unstructured_provider.process(document_meta)
+
+    assert unstructured_provider.partition_kwargs == partition_kwargs
+    assert len(elements) == 1
+    assert elements[0].content == "Name of Peppa's brother is George."
diff --git a/packages/ragbits-document-search/tests/unit/test_document_processor.py b/packages/ragbits-document-search/tests/unit/test_document_processor.py
index 0e930ff1..f329e8b3 100644
--- a/packages/ragbits-document-search/tests/unit/test_document_processor.py
+++ b/packages/ragbits-document-search/tests/unit/test_document_processor.py
@@ -1,17 +1,6 @@
-from pathlib import Path
-
-import pytest
-
 from ragbits.document_search.documents.document import DocumentMeta, DocumentType
 from ragbits.document_search.ingestion.document_processor import DocumentProcessor
 from ragbits.document_search.ingestion.providers.dummy import DummyProvider
-from ragbits.document_search.ingestion.providers.unstructured import (
-    UNSTRUCTURED_API_KEY_ENV,
-    UNSTRUCTURED_API_URL_ENV,
-    UnstructuredProvider,
-)
-
-from ..helpers import env_vars_not_set
 
 
 async def test_document_processor_processes_text_document_with_dummy_provider():
@@ -24,32 +13,3 @@ async def test_document_processor_processes_text_document_with_dummy_provider():
     assert isinstance(document_processor._providers[DocumentType.TXT], DummyProvider)
     assert len(elements) == 1
     assert elements[0].content == "Name of Peppa's brother is George"
-
-
-@pytest.mark.skipif(
-    env_vars_not_set([UNSTRUCTURED_API_URL_ENV, UNSTRUCTURED_API_KEY_ENV]),
-    reason="Unstructured API environment variables not set",
-)
-async def test_document_processor_processes_text_document_with_unstructured_provider():
-    document_processor = DocumentProcessor.from_config()
-    document_meta = DocumentMeta.create_text_document_from_literal("Name of Peppa's brother is George.")
-
-    elements = await document_processor.process(document_meta)
-
-    assert isinstance(document_processor._providers[DocumentType.TXT], UnstructuredProvider)
-    assert len(elements) == 1
-    assert elements[0].content == "Name of Peppa's brother is George"
-
-
-@pytest.mark.skipif(
-    env_vars_not_set([UNSTRUCTURED_API_URL_ENV, UNSTRUCTURED_API_KEY_ENV]),
-    reason="Unstructured API environment variables not set",
-)
-async def test_document_processor_processes_md_document_with_unstructured_provider():
-    document_processor = DocumentProcessor.from_config()
-    document_meta = DocumentMeta.from_local_path(Path(__file__).parent.parent.parent.parent.parent / "README.md")
-
-    elements = await document_processor.process(document_meta)
-
-    assert len(elements) > 0
-    assert elements[0].content == "Ragbits"
diff --git a/packages/ragbits-document-search/tests/unit/test_providers.py b/packages/ragbits-document-search/tests/unit/test_providers.py
index 7da2570c..5bde8e52 100644
--- a/packages/ragbits-document-search/tests/unit/test_providers.py
+++ b/packages/ragbits-document-search/tests/unit/test_providers.py
@@ -4,14 +4,11 @@
 from ragbits.document_search.documents.document import DocumentMeta, DocumentType
 from ragbits.document_search.ingestion.providers.base import DocumentTypeNotSupportedError
 from ragbits.document_search.ingestion.providers.unstructured import (
-    DEFAULT_PARTITION_KWARGS,
     UNSTRUCTURED_API_KEY_ENV,
     UNSTRUCTURED_API_URL_ENV,
     UnstructuredProvider,
 )
 
-from ..helpers import env_vars_not_set
-
 load_dotenv()
 
 
@@ -44,32 +41,3 @@ async def test_unstructured_provider_raises_value_error_when_api_url_not_set(mon
         )
 
     assert f"{UNSTRUCTURED_API_URL_ENV} environment variable is not set" in str(err.value)
-
-
-@pytest.mark.skipif(
-    env_vars_not_set([UNSTRUCTURED_API_URL_ENV, UNSTRUCTURED_API_KEY_ENV]),
-    reason="Unstructured API environment variables not set",
-)
-async def test_unstructured_provider_document_with_default_partition_kwargs():
-    document_meta = DocumentMeta.create_text_document_from_literal("Name of Peppa's brother is George.")
-    unstructured_provider = UnstructuredProvider()
-    elements = await unstructured_provider.process(document_meta)
-
-    assert unstructured_provider.partition_kwargs == DEFAULT_PARTITION_KWARGS
-    assert len(elements) == 1
-    assert elements[0].content == "Name of Peppa's brother is George."
-
-
-@pytest.mark.skipif(
-    env_vars_not_set([UNSTRUCTURED_API_URL_ENV, UNSTRUCTURED_API_KEY_ENV]),
-    reason="Unstructured API environment variables not set",
-)
-async def test_unstructured_provider_document_with_custom_partition_kwargs():
-    document_meta = DocumentMeta.create_text_document_from_literal("Name of Peppa's brother is George.")
-    partition_kwargs = {"languages": ["pl"], "strategy": "fast"}
-    unstructured_provider = UnstructuredProvider(partition_kwargs=partition_kwargs)
-    elements = await unstructured_provider.process(document_meta)
-
-    assert unstructured_provider.partition_kwargs == partition_kwargs
-    assert len(elements) == 1
-    assert elements[0].content == "Name of Peppa's brother is George."