From 3410cd33c9f6c6bf5972b0db6027b0e9f0f041db Mon Sep 17 00:00:00 2001 From: igiloh-pinecone <118673156+igiloh-pinecone@users.noreply.github.com> Date: Thu, 18 Jan 2024 11:07:41 +0200 Subject: [PATCH] Parameterize [server, pod] and [namespace] across all tests (#250) * [test] run E2E test on both serverless and pod Since we had a single index_name fixture - both tests were trying to create an index with the same name simulteneously * [test] Parameterize tests over [serverless, pod] I've created a centralized conftest.py across all tests (e2e and system) with parametrized fixtures - index_params and namespace. Now all tests can simply depend on these fixtures to be parameterized * Make linter happy * Fix cleanup indexes --------- Co-authored-by: Izel Levy --- src/canopy/knowledge_base/knowledge_base.py | 4 +- tests/conftest.py | 20 ++++++++++ tests/e2e/test_app.py | 26 +++---------- .../knowledge_base/test_knowledge_base.py | 12 +++--- tests/unit/chat_engine/test_chat_engine.py | 7 ---- .../context_engine/test_context_engine.py | 6 --- tests/util.py | 38 +++++++++---------- 7 files changed, 50 insertions(+), 63 deletions(-) create mode 100644 tests/conftest.py diff --git a/src/canopy/knowledge_base/knowledge_base.py b/src/canopy/knowledge_base/knowledge_base.py index 4f6dab4c..2da6239c 100644 --- a/src/canopy/knowledge_base/knowledge_base.py +++ b/src/canopy/knowledge_base/knowledge_base.py @@ -22,8 +22,8 @@ from canopy.models.data_models import Query, Document INDEX_NAME_PREFIX = "canopy--" -TIMEOUT_INDEX_CREATE = 30 -TIMEOUT_INDEX_PROVISION = 300 +TIMEOUT_INDEX_CREATE = 90 +TIMEOUT_INDEX_PROVISION = 120 INDEX_PROVISION_TIME_INTERVAL = 3 RESERVED_METADATA_KEYS = {"document_id", "text", "source"} diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 00000000..091c5bff --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,20 @@ +import pytest + +TEST_NAMESPACE = "ns" +TEST_CREATE_INDEX_PARAMS = [ + {"spec": {"serverless": {"cloud": "aws", "region": "us-west-2"}}}, + {"spec": {"pod": {"environment": "eu-west1-gcp", "pod_type": "p1.x1"}}}, +] + + +@pytest.fixture(scope="module", params=[None, TEST_NAMESPACE]) +def namespace(request): + return request.param + + +@pytest.fixture(scope="module", + params=TEST_CREATE_INDEX_PARAMS, + # The first key in the spec is the index type ("serverless" \ "pod") + ids=[next(iter(_["spec"])) for _ in TEST_CREATE_INDEX_PARAMS]) +def create_index_params(request): + return request.param diff --git a/tests/e2e/test_app.py b/tests/e2e/test_app.py index 990bf64a..6ffc39e9 100644 --- a/tests/e2e/test_app.py +++ b/tests/e2e/test_app.py @@ -5,7 +5,7 @@ import pytest from fastapi.testclient import TestClient -from tenacity import retry, stop_after_attempt, wait_fixed, wait_random +from tenacity import retry, stop_after_attempt, wait_fixed from canopy.knowledge_base import KnowledgeBase from canopy.knowledge_base.knowledge_base import list_canopy_indexes @@ -16,7 +16,7 @@ ContextUpsertRequest, ContextQueryRequest) from .. import Tokenizer -from ..util import create_e2e_tests_index_name, TEST_NAMESPACE, TEST_CREATE_INDEX_PARAMS +from ..util import create_e2e_tests_index_name upsert_payload = ContextUpsertRequest( documents=[ @@ -30,29 +30,19 @@ ) -@pytest.fixture(scope="module", params=[None, TEST_NAMESPACE]) -def namespace(request): - return request.param - - -@pytest.fixture(scope="module", params=TEST_CREATE_INDEX_PARAMS) -def create_index_params(request): - return request.param - - @pytest.fixture(scope="module") def namespace_prefix(namespace): return f"{namespace}/" if namespace is not None else "" -@retry(reraise=True, stop=stop_after_attempt(5), wait=wait_random(min=10, max=20)) def try_create_canopy_index(kb: KnowledgeBase, init_params: Dict[str, Any]): kb.create_canopy_index(**init_params) @pytest.fixture(scope="module") -def index_name(testrun_uid: str): - return create_e2e_tests_index_name(testrun_uid) +def index_name(testrun_uid: str, create_index_params): + index_type = next(iter(create_index_params["spec"])) + return create_e2e_tests_index_name(testrun_uid, index_type) @retry(reraise=True, stop=stop_after_attempt(60), wait=wait_fixed(1)) @@ -74,12 +64,6 @@ def assert_vector_ids_not_exist(vector_ids: List[str], @pytest.fixture(scope="module", autouse=True) def knowledge_base(index_name, create_index_params): kb = KnowledgeBase(index_name=index_name) - - # System and E2E tests are running in parallel and try to create - # indexes at the same time. - # DB raises an exception when we create two indexes at the same time. - # So we need to retry for now in order to overcome this. - # TODO: Remove the retries after the DB is fixed. try_create_canopy_index(kb, create_index_params) return kb diff --git a/tests/system/knowledge_base/test_knowledge_base.py b/tests/system/knowledge_base/test_knowledge_base.py index c4a1dfa5..56a85da2 100644 --- a/tests/system/knowledge_base/test_knowledge_base.py +++ b/tests/system/knowledge_base/test_knowledge_base.py @@ -1,4 +1,5 @@ import random +from typing import Dict, Any import pytest import numpy as np @@ -7,7 +8,7 @@ retry, stop_after_delay, wait_fixed, - wait_chain, stop_after_attempt, wait_random, + wait_chain, ) from canopy.knowledge_base import KnowledgeBase @@ -59,13 +60,12 @@ def encoder(): StubDenseEncoder()) -@retry(reraise=True, stop=stop_after_attempt(5), wait=wait_random(min=10, max=20)) -def try_create_canopy_index(kb: KnowledgeBase): - kb.create_canopy_index() +def try_create_canopy_index(kb: KnowledgeBase, init_params: Dict[str, Any]): + kb.create_canopy_index(**init_params) @pytest.fixture(scope="module", autouse=True) -def knowledge_base(index_full_name, index_name, chunker, encoder): +def knowledge_base(index_full_name, index_name, chunker, encoder, create_index_params): kb = KnowledgeBase(index_name=index_name, record_encoder=encoder, chunker=chunker) @@ -73,7 +73,7 @@ def knowledge_base(index_full_name, index_name, chunker, encoder): if index_full_name in list_canopy_indexes(): _get_global_client().delete_index(index_full_name) - try_create_canopy_index(kb) + try_create_canopy_index(kb, create_index_params) return kb diff --git a/tests/unit/chat_engine/test_chat_engine.py b/tests/unit/chat_engine/test_chat_engine.py index d4091bd5..97628019 100644 --- a/tests/unit/chat_engine/test_chat_engine.py +++ b/tests/unit/chat_engine/test_chat_engine.py @@ -13,7 +13,6 @@ from canopy.models.api_models import ChatResponse, _Choice, TokenCounts from canopy.models.data_models import MessageBase, Query, Context, Role from .. import random_words -from ...util import TEST_NAMESPACE MOCK_SYSTEM_PROMPT = "This is my mock prompt" MAX_PROMPT_TOKENS = 100 @@ -101,9 +100,6 @@ def _get_inputs_and_expected(self, } return messages, expected - @pytest.mark.parametrize("namespace", [ - None, TEST_NAMESPACE - ]) def test_chat(self, namespace, history_length=5, snippet_length=10): chat_engine = self._init_chat_engine() @@ -136,9 +132,6 @@ def test_chat(self, namespace, history_length=5, snippet_length=10): assert response == expected['response'] - @pytest.mark.parametrize("namespace", [ - None, TEST_NAMESPACE - ]) @pytest.mark.parametrize("allow_model_params_override,params_override", [("False", None), ("False", {'temperature': 0.99, 'top_p': 0.5}), diff --git a/tests/unit/context_engine/test_context_engine.py b/tests/unit/context_engine/test_context_engine.py index beb063ed..1ed2b52b 100644 --- a/tests/unit/context_engine/test_context_engine.py +++ b/tests/unit/context_engine/test_context_engine.py @@ -11,7 +11,6 @@ from canopy.knowledge_base.base import BaseKnowledgeBase from canopy.knowledge_base.models import QueryResult, DocumentWithScore from canopy.models.data_models import Query, Context, ContextContent -from tests.util import TEST_NAMESPACE @pytest.fixture @@ -60,11 +59,6 @@ def mock_query_result(sample_context_text): ] -@pytest.fixture(scope="module", params=[None, TEST_NAMESPACE]) -def namespace(request): - return request.param - - def test_query(context_engine, mock_knowledge_base, mock_context_builder, diff --git a/tests/util.py b/tests/util.py index 7af802fa..f4d8d636 100644 --- a/tests/util.py +++ b/tests/util.py @@ -1,42 +1,38 @@ import logging from datetime import datetime +from typing import List -from canopy.knowledge_base.knowledge_base import _get_global_client +from canopy.knowledge_base.knowledge_base import _get_global_client, INDEX_NAME_PREFIX logger = logging.getLogger(__name__) -TEST_NAMESPACE = "ns" -TEST_CREATE_INDEX_PARAMS = [ - {"spec": {"serverless": {"cloud": "aws", "region": "us-west-2"}}}, - # TODO: Enable this - # {"spec": {"pod": {"environment": "eu-west1-gcp", "pod_type": "p1.x1"}}}, -] - def create_index_name(testrun_uid: str, prefix: str) -> str: today = datetime.today().strftime("%Y-%m-%d") - return f"{prefix}-{testrun_uid[-6:]}-{today}" + return f"{testrun_uid[-6:]}-{prefix}-{today}" def create_system_tests_index_name(testrun_uid: str) -> str: return create_index_name(testrun_uid, "test-kb") -def create_e2e_tests_index_name(testrun_uid: str) -> str: - return create_index_name(testrun_uid, "test-app") +def create_e2e_tests_index_name(testrun_uid: str, index_type: str) -> str: + return create_index_name(testrun_uid, f"test-app-{index_type}") + + +def get_related_indexes(indexes: List[str], testrun_uid: str) -> List[str]: + return [ + index for index in indexes + if index.startswith(f"{INDEX_NAME_PREFIX}{testrun_uid[-6:]}") + ] def cleanup_indexes(testrun_uid: str): client = _get_global_client() - e2e_index_name = create_e2e_tests_index_name(testrun_uid) - system_index_name = create_system_tests_index_name(testrun_uid) - index_names = (system_index_name, e2e_index_name) + current_indexes = client.list_indexes().names() + index_names = get_related_indexes(current_indexes, testrun_uid) logger.info(f"Preparing to cleanup indexes: {index_names}") - current_indexes = client.list_indexes() for index_name in index_names: - if index_name in current_indexes: - logger.info(f"Deleting index '{index_name}'...") - client.delete_index(index_name) - logger.info(f"Index '{index_name}' deleted.") - else: - logger.info(f"Index '{index_name}' does not exist.") + logger.info(f"Deleting index '{index_name}'...") + client.delete_index(index_name) + logger.info(f"Index '{index_name}' deleted.")