From cc670063b0119146c67d79349ab7cb79713d1967 Mon Sep 17 00:00:00 2001 From: Kat Petrova Date: Thu, 27 Jun 2024 16:38:57 +0200 Subject: [PATCH] qdrant requirements --- .github/workflows/continous-integration.yml | 5 ++++ README.md | 27 ++++++++++++++------- addons/qdrant.py | 6 ++--- qdrant-requirements.txt | 4 +++ 4 files changed, 30 insertions(+), 12 deletions(-) create mode 100644 qdrant-requirements.txt diff --git a/.github/workflows/continous-integration.yml b/.github/workflows/continous-integration.yml index a6b913f..9f2242b 100644 --- a/.github/workflows/continous-integration.yml +++ b/.github/workflows/continous-integration.yml @@ -48,6 +48,11 @@ jobs: - name: Set up virtual environment run: poetry config virtualenvs.in-project true + - name: Install pip dependencies from requirements file + run: | + source .venv/bin/activate + pip install -r qdrant-requirements.txt + # Authenticate with gcloud for release registry (where Rasa is published) - id: "auth-release" name: Authenticate with gcloud for release registry 🎫 diff --git a/README.md b/README.md index 2730edb..e1c5dd0 100644 --- a/README.md +++ b/README.md @@ -221,6 +221,11 @@ After you cloned the repository and are authenticated, follow the installation s ``` pyenv local 3.10.12 ``` +3. Create and activate virtual environment + ``` + pyenv virtualenv 3.10.12 new_venv + pyenv activate new_venv + ``` 3. Install the dependencies with `poetry` ``` poetry install @@ -232,7 +237,8 @@ After you cloned the repository and are authenticated, follow the installation s OPENAI_API_KEY= RASA_DUCKLING_HTTP_URL= ``` -5. [Optional] Set up the extractive search: + +5. Set up the extractive search: - Setup a local docker instance of Qdrant ``` docker pull qdrant/qdrant @@ -240,14 +246,15 @@ After you cloned the repository and are authenticated, follow the installation s -v $(pwd)/qdrant_storage:/qdrant/storage:z \ qdrant/qdrant ``` - - Update the virtual environment - ``` - poetry add datasets cohere qdrant-client sentence-transformers - ``` - - Ingest documents from SQUAD dataset (modify the script if qdrant isn't running locally!) - ``` - python scripts/load-data-to-qdrant.py - ``` + - Upload data to Qdrant + - Create a virtual environment for the upload: + ``` + pip install -r qdrant-requirements.txt + ``` + - Ingest documents from SQUAD dataset (modify the script if qdrant isn't running locally!) + ``` + python scripts/load-data-to-qdrant.py + ``` You can toggle parameter `use_generative_llm` in config.yml to change the behavior. The answer is selected from the first search result -> metadata -> `answer` key #### Custom Information Retriever @@ -366,3 +373,5 @@ or ```commandline rasa test e2e e2e/tests/path/to/a/target/test.yml ``` + +## Running the project with enterprise search diff --git a/addons/qdrant.py b/addons/qdrant.py index 416df6a..f85e4a3 100644 --- a/addons/qdrant.py +++ b/addons/qdrant.py @@ -1,4 +1,4 @@ -from typing import Text, Any +from typing import Text, Any, Dict import structlog from langchain.vectorstores.qdrant import Qdrant @@ -39,7 +39,7 @@ def __init__(self, message: str) -> None: def __str__(self) -> str: return self.base_message + self.message + f"{self.__cause__}" -def prepare_search_query(tracker_state: dict[str, Any]) -> str: +def prepare_search_query(tracker_state: Dict[str, Any]) -> str: """Uses Cohere to generate a search query from the chat history. Args: tracker_state: The tracker state. @@ -97,7 +97,7 @@ def connect( ) async def search( - self, query: Text, tracker_state: dict[str, Any], threshold: float = 0.0 + self, query: Text, tracker_state: Dict[str, Any], threshold: float = 0.0 ) -> SearchResultList: """Search for a document in the Qdrant vector store. diff --git a/qdrant-requirements.txt b/qdrant-requirements.txt new file mode 100644 index 0000000..1015446 --- /dev/null +++ b/qdrant-requirements.txt @@ -0,0 +1,4 @@ +datasets==2.20.0 +cohere==5.5.8 +qdrant-client==1.9.2 +sentence-transformers==3.0.1