Skip to content

Commit

Permalink
Move unstructured tests to integration tests folder
Browse files Browse the repository at this point in the history
  • Loading branch information
akonarski-ds committed Sep 23, 2024
1 parent 51629a3 commit 1332c74
Show file tree
Hide file tree
Showing 3 changed files with 72 additions and 72 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
from pathlib import Path

import pytest

from ragbits.document_search.documents.document import DocumentMeta, DocumentType
from ragbits.document_search.ingestion.document_processor import DocumentProcessor
from ragbits.document_search.ingestion.providers.unstructured import (
DEFAULT_PARTITION_KWARGS,
UNSTRUCTURED_API_KEY_ENV,
UNSTRUCTURED_API_URL_ENV,
UnstructuredProvider,
)

from ..helpers import env_vars_not_set

Check failure on line 14 in packages/ragbits-document-search/tests/integration/test_unstructured.py

View workflow job for this annotation

GitHub Actions / JUnit Test Report

test_unstructured.packages.ragbits-document-search.tests.integration.test_unstructured

collection failure
Raw output
ImportError while importing test module '/home/runner/work/ragbits/ragbits/packages/ragbits-document-search/tests/integration/test_unstructured.py'.
Hint: make sure your test modules/packages have valid Python names.
Traceback:
/opt/hostedtoolcache/Python/3.10.15/x64/lib/python3.10/importlib/__init__.py:126: in import_module
    return _bootstrap._gcd_import(name[level:], package, level)
packages/ragbits-document-search/tests/integration/test_unstructured.py:14: in <module>
    from ..helpers import env_vars_not_set
E   ImportError: attempted relative import with no known parent package


@pytest.mark.skipif(
env_vars_not_set([UNSTRUCTURED_API_URL_ENV, UNSTRUCTURED_API_KEY_ENV]),
reason="Unstructured API environment variables not set",
)
async def test_document_processor_processes_text_document_with_unstructured_provider():
document_processor = DocumentProcessor.from_config()
document_meta = DocumentMeta.create_text_document_from_literal("Name of Peppa's brother is George.")

elements = await document_processor.process(document_meta)

assert isinstance(document_processor._providers[DocumentType.TXT], UnstructuredProvider)
assert len(elements) == 1
assert elements[0].content == "Name of Peppa's brother is George"


@pytest.mark.skipif(
env_vars_not_set([UNSTRUCTURED_API_URL_ENV, UNSTRUCTURED_API_KEY_ENV]),
reason="Unstructured API environment variables not set",
)
async def test_document_processor_processes_md_document_with_unstructured_provider():
document_processor = DocumentProcessor.from_config()
document_meta = DocumentMeta.from_local_path(Path(__file__).parent.parent.parent.parent.parent / "README.md")

elements = await document_processor.process(document_meta)

assert len(elements) > 0
assert elements[0].content == "Ragbits"


@pytest.mark.skipif(
env_vars_not_set([UNSTRUCTURED_API_URL_ENV, UNSTRUCTURED_API_KEY_ENV]),
reason="Unstructured API environment variables not set",
)
async def test_unstructured_provider_document_with_default_partition_kwargs():
document_meta = DocumentMeta.create_text_document_from_literal("Name of Peppa's brother is George.")
unstructured_provider = UnstructuredProvider()
elements = await unstructured_provider.process(document_meta)

assert unstructured_provider.partition_kwargs == DEFAULT_PARTITION_KWARGS
assert len(elements) == 1
assert elements[0].content == "Name of Peppa's brother is George."


@pytest.mark.skipif(
env_vars_not_set([UNSTRUCTURED_API_URL_ENV, UNSTRUCTURED_API_KEY_ENV]),
reason="Unstructured API environment variables not set",
)
async def test_unstructured_provider_document_with_custom_partition_kwargs():
document_meta = DocumentMeta.create_text_document_from_literal("Name of Peppa's brother is George.")
partition_kwargs = {"languages": ["pl"], "strategy": "fast"}
unstructured_provider = UnstructuredProvider(partition_kwargs=partition_kwargs)
elements = await unstructured_provider.process(document_meta)

assert unstructured_provider.partition_kwargs == partition_kwargs
assert len(elements) == 1
assert elements[0].content == "Name of Peppa's brother is George."
Original file line number Diff line number Diff line change
@@ -1,17 +1,6 @@
from pathlib import Path

import pytest

from ragbits.document_search.documents.document import DocumentMeta, DocumentType
from ragbits.document_search.ingestion.document_processor import DocumentProcessor
from ragbits.document_search.ingestion.providers.dummy import DummyProvider
from ragbits.document_search.ingestion.providers.unstructured import (
UNSTRUCTURED_API_KEY_ENV,
UNSTRUCTURED_API_URL_ENV,
UnstructuredProvider,
)

from ..helpers import env_vars_not_set


async def test_document_processor_processes_text_document_with_dummy_provider():
Expand All @@ -24,32 +13,3 @@ async def test_document_processor_processes_text_document_with_dummy_provider():
assert isinstance(document_processor._providers[DocumentType.TXT], DummyProvider)
assert len(elements) == 1
assert elements[0].content == "Name of Peppa's brother is George"


@pytest.mark.skipif(
env_vars_not_set([UNSTRUCTURED_API_URL_ENV, UNSTRUCTURED_API_KEY_ENV]),
reason="Unstructured API environment variables not set",
)
async def test_document_processor_processes_text_document_with_unstructured_provider():
document_processor = DocumentProcessor.from_config()
document_meta = DocumentMeta.create_text_document_from_literal("Name of Peppa's brother is George.")

elements = await document_processor.process(document_meta)

assert isinstance(document_processor._providers[DocumentType.TXT], UnstructuredProvider)
assert len(elements) == 1
assert elements[0].content == "Name of Peppa's brother is George"


@pytest.mark.skipif(
env_vars_not_set([UNSTRUCTURED_API_URL_ENV, UNSTRUCTURED_API_KEY_ENV]),
reason="Unstructured API environment variables not set",
)
async def test_document_processor_processes_md_document_with_unstructured_provider():
document_processor = DocumentProcessor.from_config()
document_meta = DocumentMeta.from_local_path(Path(__file__).parent.parent.parent.parent.parent / "README.md")

elements = await document_processor.process(document_meta)

assert len(elements) > 0
assert elements[0].content == "Ragbits"
32 changes: 0 additions & 32 deletions packages/ragbits-document-search/tests/unit/test_providers.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,11 @@
from ragbits.document_search.documents.document import DocumentMeta, DocumentType
from ragbits.document_search.ingestion.providers.base import DocumentTypeNotSupportedError
from ragbits.document_search.ingestion.providers.unstructured import (
DEFAULT_PARTITION_KWARGS,
UNSTRUCTURED_API_KEY_ENV,
UNSTRUCTURED_API_URL_ENV,
UnstructuredProvider,
)

from ..helpers import env_vars_not_set

load_dotenv()


Expand Down Expand Up @@ -44,32 +41,3 @@ async def test_unstructured_provider_raises_value_error_when_api_url_not_set(mon
)

assert f"{UNSTRUCTURED_API_URL_ENV} environment variable is not set" in str(err.value)


@pytest.mark.skipif(
env_vars_not_set([UNSTRUCTURED_API_URL_ENV, UNSTRUCTURED_API_KEY_ENV]),
reason="Unstructured API environment variables not set",
)
async def test_unstructured_provider_document_with_default_partition_kwargs():
document_meta = DocumentMeta.create_text_document_from_literal("Name of Peppa's brother is George.")
unstructured_provider = UnstructuredProvider()
elements = await unstructured_provider.process(document_meta)

assert unstructured_provider.partition_kwargs == DEFAULT_PARTITION_KWARGS
assert len(elements) == 1
assert elements[0].content == "Name of Peppa's brother is George."


@pytest.mark.skipif(
env_vars_not_set([UNSTRUCTURED_API_URL_ENV, UNSTRUCTURED_API_KEY_ENV]),
reason="Unstructured API environment variables not set",
)
async def test_unstructured_provider_document_with_custom_partition_kwargs():
document_meta = DocumentMeta.create_text_document_from_literal("Name of Peppa's brother is George.")
partition_kwargs = {"languages": ["pl"], "strategy": "fast"}
unstructured_provider = UnstructuredProvider(partition_kwargs=partition_kwargs)
elements = await unstructured_provider.process(document_meta)

assert unstructured_provider.partition_kwargs == partition_kwargs
assert len(elements) == 1
assert elements[0].content == "Name of Peppa's brother is George."

0 comments on commit 1332c74

Please sign in to comment.