Skip to content
This repository has been archived by the owner on Nov 13, 2024. It is now read-only.

Commit

Permalink
Cleanup indexes in case of failure (#232)
Browse files Browse the repository at this point in the history
* Cleanup indexes in case of failure

* Remove shell bash

* Move to script

* Fix

* Fix env

* Poetry run

* Fix

* Try fix

* Try

* Fix

* Update secrets

* Test

* Fix

* Fail system test

* Improve logging

* Fix lint
  • Loading branch information
izellevy authored Dec 20, 2023
1 parent d0bcd22 commit 29fb994
Show file tree
Hide file tree
Showing 6 changed files with 98 additions and 23 deletions.
26 changes: 23 additions & 3 deletions .github/workflows/PR.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@ jobs:
matrix:
python-version: [3.9, '3.10', 3.11]
pinecone-plan: ["paid", "starter"]

steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
Expand All @@ -66,6 +65,7 @@ jobs:
echo "${SUFFIX}"
echo "INDEX_NAME_SUFFIX=${SUFFIX}" >> $GITHUB_OUTPUT
- name: Run system tests
id: system_tests
if: github.event_name == 'merge_group'
env:
INDEX_NAME: system-${{ steps.gen_suffix.outputs.INDEX_NAME_SUFFIX }}
Expand All @@ -74,17 +74,37 @@ jobs:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
ANYSCALE_API_KEY: ${{ secrets.ANYSCALE_API_KEY }}
CO_API_KEY: ${{ secrets.CO_API_KEY }}
run: poetry run pytest -n 3 --dist loadscope --html=report_system.html --self-contained-html tests/system
run: |
run_id=$(uuidgen | tr -d '-' | tr '[:upper:]' '[:lower:]')
echo "run_id=${run_id}" >> $GITHUB_OUTPUT
echo "Test Run ID: ${run_id}"
poetry run pytest -n 3 --dist loadscope --testrunuid $run_id --html=report_system.html --self-contained-html tests/system
exit 1
- name: Run e2e tests
id: e2e_tests
if: github.event_name == 'merge_group'
env:
INDEX_NAME: e2e-${{ steps.gen_suffix.outputs.INDEX_NAME_SUFFIX }}
PINECONE_ENVIRONMENT: ${{ matrix.pinecone-plan == 'paid' && secrets.PINECONE_ENVIRONMENT_3 || secrets.PINECONE_ENVIRONMENT_4 }}
PINECONE_API_KEY: ${{ matrix.pinecone-plan == 'paid' && secrets.PINECONE_API_KEY_3 || secrets.PINECONE_API_KEY_4 }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
ANYSCALE_API_KEY: ${{ secrets.ANYSCALE_API_KEY }}
CO_API_KEY: ${{ secrets.CO_API_KEY }}
CE_LOG_FILENAME: e2e.log
run: poetry run pytest -n 3 --dist loadscope --html=report_e2e.html --self-contained-html tests/e2e
run: |
run_id=$(uuidgen | tr -d '-' | tr '[:upper:]' '[:lower:]')
echo "run_id=${run_id}" >> $GITHUB_OUTPUT
echo "Test Run ID: ${run_id}"
poetry run pytest -n 3 --dist loadscope --testrunuid $run_id --html=report_e2e.html --self-contained-html tests/e2e
- name: Cleanup indexes
if: (cancelled() || failure()) && github.event_name == 'merge_group'
env:
PINECONE_ENVIRONMENT: ${{ matrix.pinecone-plan == 'paid' && secrets.PINECONE_ENVIRONMENT_3 || secrets.PINECONE_ENVIRONMENT_4 }}
PINECONE_API_KEY: ${{ matrix.pinecone-plan == 'paid' && secrets.PINECONE_API_KEY_3 || secrets.PINECONE_API_KEY_4 }}
run: |
export PYTHONPATH=.
poetry run python scripts/cleanup_indexes.py "${{ steps.e2e_tests.outputs.run_id }}"
poetry run python scripts/cleanup_indexes.py "${{ steps.system_tests.outputs.run_id }}"
- name: upload pytest report.html
uses: actions/upload-artifact@v3
if: always()
Expand Down
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -90,3 +90,5 @@ skip-checking-raises = true
[tool.poetry.scripts]
canopy = "canopy_cli.cli:cli"

[tool.pytest.ini_options]
log_cli = true
23 changes: 23 additions & 0 deletions scripts/cleanup_indexes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import logging
import sys
from tests.util import cleanup_indexes


def main():
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

if len(sys.argv) != 2:
logger.info("Usage: python scripts/cleanup_indexes.py <testrun_uid>")
sys.exit(1)

testrun_uid = sys.argv[1]
if testrun_uid:
logger.info(f"Cleaning up indexes for testrun_uid '{testrun_uid}'")
cleanup_indexes(testrun_uid)
else:
logger.info("testrun_uid is not passed, index cleanup will not be run.")


if __name__ == '__main__':
main()
15 changes: 6 additions & 9 deletions tests/e2e/test_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,19 @@
import os
from typing import List

from datetime import datetime

import pinecone
import pytest
from fastapi.testclient import TestClient
from tenacity import retry, stop_after_attempt, wait_fixed

from canopy.knowledge_base import KnowledgeBase

from canopy_server.app import app, API_VERSION
from canopy_server.models.v1.api_models import (
HealthStatus,
ContextUpsertRequest,
ContextQueryRequest)
from .. import Tokenizer
from ..util import create_e2e_tests_index_name

upsert_payload = ContextUpsertRequest(
documents=[
Expand Down Expand Up @@ -45,17 +43,15 @@ def assert_vector_ids_not_exist(vector_ids: List[str],


@pytest.fixture(scope="module")
def index_name(testrun_uid):
today = datetime.today().strftime("%Y-%m-%d")
return f"test-app-{testrun_uid[-6:]}-{today}"
def index_name(testrun_uid: str):
return create_e2e_tests_index_name(testrun_uid)


@pytest.fixture(scope="module", autouse=True)
def knowledge_base(index_name):
pinecone.init()
kb = KnowledgeBase(index_name=index_name)
kb.create_canopy_index(indexed_fields=["test"])

return kb


Expand Down Expand Up @@ -84,6 +80,7 @@ def teardown_knowledge_base(knowledge_base):
if index_name in pinecone.list_indexes():
pinecone.delete_index(index_name)


# TODO: the following test is a complete e2e test, this it not the final design
# for the e2e tests, however there were some issues
# with the fixtures that will be resovled
Expand All @@ -93,8 +90,8 @@ def test_health(client):
health_response = client.get("/health")
assert health_response.is_success
assert (
health_response.json()
== HealthStatus(pinecone_status="OK", llm_status="OK").dict()
health_response.json()
== HealthStatus(pinecone_status="OK", llm_status="OK").dict()
)


Expand Down
20 changes: 9 additions & 11 deletions tests/system/knowledge_base/test_knowledge_base.py
Original file line number Diff line number Diff line change
@@ -1,29 +1,29 @@
import os
import random

import pytest
import pinecone
import numpy as np
import pinecone
import pytest
from dotenv import load_dotenv
from tenacity import (
retry,
stop_after_delay,
wait_fixed,
wait_chain,
)
from dotenv import load_dotenv
from datetime import datetime

from canopy.knowledge_base import KnowledgeBase, list_canopy_indexes
from canopy.knowledge_base.chunker import Chunker
from canopy.knowledge_base.knowledge_base import INDEX_NAME_PREFIX
from canopy.knowledge_base.models import DocumentWithScore
from canopy.knowledge_base.record_encoder import RecordEncoder
from canopy.knowledge_base.reranker import Reranker
from canopy.models.data_models import Document, Query
from tests.unit.stubs.stub_record_encoder import StubRecordEncoder
from tests.unit.stubs.stub_dense_encoder import StubDenseEncoder
from tests.unit.stubs.stub_chunker import StubChunker
from tests.unit import random_words

from tests.unit.stubs.stub_chunker import StubChunker
from tests.unit.stubs.stub_dense_encoder import StubDenseEncoder
from tests.unit.stubs.stub_record_encoder import StubRecordEncoder
from tests.util import create_system_tests_index_name

load_dotenv()

Expand All @@ -42,8 +42,7 @@ def retry_decorator():

@pytest.fixture(scope="module")
def index_name(testrun_uid):
today = datetime.today().strftime("%Y-%m-%d")
return f"test-kb-{testrun_uid[-6:]}-{today}"
return create_system_tests_index_name(testrun_uid)


@pytest.fixture(scope="module")
Expand Down Expand Up @@ -299,7 +298,6 @@ def test_update_documents(encoder,
documents,
encoded_chunks,
knowledge_base):

index_name = knowledge_base._index_name

# chunker/kb that produces fewer chunks per doc
Expand Down
35 changes: 35 additions & 0 deletions tests/util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import logging
from datetime import datetime

import pinecone

logger = logging.getLogger(__name__)


def create_index_name(testrun_uid: str, prefix: str) -> str:
today = datetime.today().strftime("%Y-%m-%d")
return f"{prefix}-{testrun_uid[-6:]}-{today}"


def create_system_tests_index_name(testrun_uid: str) -> str:
return create_index_name(testrun_uid, "test-kb")


def create_e2e_tests_index_name(testrun_uid: str) -> str:
return create_index_name(testrun_uid, "test-app")


def cleanup_indexes(testrun_uid: str):
pinecone.init()
e2e_index_name = create_e2e_tests_index_name(testrun_uid)
system_index_name = create_system_tests_index_name(testrun_uid)
index_names = (system_index_name, e2e_index_name)
logger.info(f"Preparing to cleanup indexes: {index_names}")
current_indexes = pinecone.list_indexes()
for index_name in index_names:
if index_name in current_indexes:
logger.info(f"Deleting index '{index_name}'...")
pinecone.delete_index(index_name)
logger.info(f"Index '{index_name}' deleted.")
else:
logger.info(f"Index '{index_name}' does not exist.")

0 comments on commit 29fb994

Please sign in to comment.