Skip to content

Commit

Permalink
Feature/add prompt tests and cleanup (#1523)
Browse files Browse the repository at this point in the history
* add extra parsers

* add prompt tests, cleanup

* add prompt tests, cleanup

* merge

* set mock console as default

* set mock console as default
  • Loading branch information
emrgnt-cmplxty authored Oct 29, 2024
1 parent 939f7d4 commit 746bfe4
Show file tree
Hide file tree
Showing 11 changed files with 308 additions and 19 deletions.
42 changes: 42 additions & 0 deletions .github/actions/run-sdk-prompt-management-tests/action.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
name: 'Run SDK Prompt Management Tests'
description: 'Runs SDK prompt management tests for R2R'
runs:
using: "composite"
steps:
# First run basic prompt operations
- name: Add prompt test (SDK)
working-directory: ./py
shell: bash
run: poetry run python tests/integration/runner_sdk.py test_add_prompt

- name: Get prompt test (SDK)
working-directory: ./py
shell: bash
run: poetry run python tests/integration/runner_sdk.py test_get_prompt

- name: Get all prompts test (SDK)
working-directory: ./py
shell: bash
run: poetry run python tests/integration/runner_sdk.py test_get_all_prompts

- name: Update prompt test (SDK)
working-directory: ./py
shell: bash
run: poetry run python tests/integration/runner_sdk.py test_update_prompt

# Then run error handling and access control tests
- name: Prompt error handling test (SDK)
working-directory: ./py
shell: bash
run: poetry run python tests/integration/runner_sdk.py test_prompt_error_handling

- name: Prompt access control test (SDK)
working-directory: ./py
shell: bash
run: poetry run python tests/integration/runner_sdk.py test_prompt_access_control

# Finally run deletion test
- name: Delete prompt test (SDK)
working-directory: ./py
shell: bash
run: poetry run python tests/integration/runner_sdk.py test_delete_prompt
6 changes: 6 additions & 0 deletions .github/workflows/r2r-full-py-integration-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ jobs:
- sdk-retrieval
- sdk-auth
- sdk-collections
- sdk-prompts
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
TELEMETRY_ENABLED: 'false'
Expand Down Expand Up @@ -82,3 +83,8 @@ jobs:
if: matrix.test_category == 'sdk-collections'
uses: ./.github/actions/run-sdk-collections-tests
continue-on-error: true

- name: Run SDK Prompt Tests
if: matrix.test_category == 'sdk-prompts'
uses: ./.github/actions/run-sdk-prompt-management-tests
continue-on-error: true
6 changes: 6 additions & 0 deletions .github/workflows/r2r-light-py-integration-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ jobs:
- sdk-retrieval
- sdk-auth
- sdk-collections
- sdk-prompts
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
TELEMETRY_ENABLED: 'false'
Expand Down Expand Up @@ -85,3 +86,8 @@ jobs:
if: matrix.test_category == 'sdk-collections'
uses: ./.github/actions/run-sdk-collections-tests
continue-on-error: true

- name: Run SDK Prompt Tests
if: matrix.test_category == 'sdk-prompts'
uses: ./.github/actions/run-sdk-prompt-management-tests
continue-on-error: true
2 changes: 1 addition & 1 deletion py/core/base/providers/email.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ class EmailConfig(ProviderConfig):
smtp_username: Optional[str] = None
smtp_password: Optional[str] = None
from_email: Optional[str] = None
use_tls: bool = True
use_tls: Optional[bool] = True

@property
def supported_providers(self) -> list[str]:
Expand Down
10 changes: 5 additions & 5 deletions py/core/base/providers/ingestion.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,13 @@ class IngestionConfig(ProviderConfig):
)
extra_parsers: dict[str, str] = {}

audio_transcription_model: str
audio_transcription_model: str = "openai/whisper-1"

vision_img_prompt_name: Optional[str] = None
vision_img_model: str
vision_img_prompt_name: str = "vision_img"
vision_img_model: str = "openai/gpt-4-mini"

vision_pdf_prompt_name: Optional[str] = None
vision_pdf_model: str
vision_pdf_prompt_name: str = "vision_pdf"
vision_pdf_model: str = "openai/gpt-4-mini"

@property
def supported_providers(self) -> list[str]:
Expand Down
3 changes: 0 additions & 3 deletions py/core/parsers/media/img_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@
class ImageParser(AsyncParser[DataType]):
"""A parser for image data using vision models."""

DEFAULT_IMG_VISION_PROMPT_NAME = "vision_img"

def __init__(
self,
config: IngestionConfig,
Expand Down Expand Up @@ -56,7 +54,6 @@ async def ingest( # type: ignore
if not self.vision_prompt_text:
self.vision_prompt_text = await self.database_provider.get_prompt( # type: ignore
prompt_name=self.config.vision_img_prompt_name
or self.DEFAULT_IMG_VISION_PROMPT_NAME
)
try:
# Verify model supports vision
Expand Down
3 changes: 0 additions & 3 deletions py/core/parsers/media/pdf_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,6 @@
class VLMPDFParser(AsyncParser[DataType]):
"""A parser for PDF documents using vision models for page processing."""

DEFAULT_PDF_VISION_PROMPT_NAME = "vision_pdf"

def __init__(
self,
config: IngestionConfig,
Expand Down Expand Up @@ -142,7 +140,6 @@ async def ingest(
if not self.vision_prompt_text:
self.vision_prompt_text = await self.database_provider.get_prompt( # type: ignore
prompt_name=self.config.vision_pdf_prompt_name
or self.DEFAULT_PDF_VISION_PROMPT_NAME
)

temp_dir = None
Expand Down
12 changes: 8 additions & 4 deletions py/r2r.toml
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,13 @@ batch_size = 256

[embedding]
provider = "litellm"
base_model = "openai/text-embedding-3-large"
base_dimension = 3072
quantization_settings = { quantization_type = "INT1" }
# RECOMMENDED - For advanced applications,
# use `openai/text-embedding-3-large` with `base_dimension = 3072` and binary quantization
# base_model = "openai/text-embedding-3-large"
# base_dimension = 3072
# quantization_settings = { quantization_type = "INT1" }
base_model = "openai/text-embedding-3-small"
base_dimension = 512
batch_size = 128
add_title_as_prefix = false
rerank_model = "None"
Expand Down Expand Up @@ -116,4 +120,4 @@ provider = "simple"
provider = "r2r"

[email]
provider = "smtp"
provider = "mock_console"
19 changes: 17 additions & 2 deletions py/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
BCryptConfig,
CompletionConfig,
DatabaseConfig,
EmailConfig,
EmbeddingConfig,
PersistentLoggingConfig,
SqlitePersistentLoggingProvider,
Expand All @@ -29,6 +30,7 @@
)
from core.providers import (
BCryptProvider,
ConsoleMockEmailProvider,
LiteLLMCompletionProvider,
LiteLLMEmbeddingProvider,
PostgresDBProvider,
Expand Down Expand Up @@ -169,12 +171,25 @@ def auth_config(app_config):
)


@pytest.fixture(scope="function")
def email_provider(app_config):
return ConsoleMockEmailProvider(
EmailConfig(provider="console_mock", app=app_config)
)


@pytest.fixture(scope="function")
async def r2r_auth_provider(
auth_config, crypto_provider, temporary_postgres_db_provider
auth_config,
crypto_provider,
temporary_postgres_db_provider,
email_provider,
):
auth_provider = R2RAuthProvider(
auth_config, crypto_provider, temporary_postgres_db_provider
auth_config,
crypto_provider,
temporary_postgres_db_provider,
email_provider,
)
await auth_provider.initialize()
yield auth_provider
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ async def test_upsert_documents_overview(temporary_postgres_db_provider):
assert inserted_document.id == document_info.id
assert inserted_document.collection_ids == document_info.collection_ids
assert inserted_document.user_id == document_info.user_id
assert inserted_document.type == document_info.document_type
assert inserted_document.document_type == document_info.document_type
assert inserted_document.metadata == document_info.metadata
assert inserted_document.title == document_info.title
assert inserted_document.version == document_info.version
Expand Down
Loading

0 comments on commit 746bfe4

Please sign in to comment.