Merge pull request #20 from prrao87/qdrant

Qdrant refactor
prrao87 · Apr 24, 2023 · 34d988a · 34d988a
2 parents dc1607d + 4a30334
commit 34d988a
Show file tree

Hide file tree

Showing 20 changed files with 387 additions and 199 deletions.
diff --git a/.gitignore b/.gitignore
@@ -134,5 +134,5 @@ dmypy.json
 # data
 data/*.json
 data/*.jsonl
-*/*/meili_data
-dbs/qdrant/scripts/onnx_models
+dbs/meilisearch/meili_data
+dbs/qdrant/onnx_model/onnx
diff --git a/dbs/elasticsearch/README.md b/dbs/elasticsearch/README.md
@@ -44,7 +44,7 @@ docker compose down
 **Note:** The setup shown here would not be ideal in production, as there are other details related to security and scalability that are not addressed via simple docker, but, this is a good starting point to begin experimenting!
 
 
-## Step 1: Ingest the data
+## Step 2: Ingest the data
 
 The first step is to ingest the wine reviews dataset into Elasticsearch. Data is asynchronously ingested into the Elasticsearch database through the scripts in the `scripts` directory.
 

diff --git a/dbs/meilisearch/README.md b/dbs/meilisearch/README.md
@@ -45,7 +45,7 @@ docker compose down
 **Note:** The setup shown here would not be ideal in production, as there are other details related to security and scalability that are not addressed via simple docker, but, this is a good starting point to begin experimenting!
 
 
-## Step 1: Ingest the data
+## Step 2: Ingest the data
 
 The first step is to ingest the wine reviews dataset into Meilisearch. Data is asynchronously ingested into the Meilisearch database through the scripts in the `scripts` directory.
 

diff --git a/dbs/neo4j/README.md b/dbs/neo4j/README.md
@@ -47,7 +47,7 @@ docker compose down
 **Note:** The setup shown here would not be ideal in production, as there are other details related to security and scalability that are not addressed via simple docker, but, this is a good starting point to begin experimenting!
 
 
-## Step 1: Ingest the data
+## Step 2: Ingest the data
 
 The first step is to ingest the wine reviews dataset into Neo4j. To do this, we first conceptualize the following data model:
 

diff --git a/dbs/qdrant/.env.example b/dbs/qdrant/.env.example
@@ -4,6 +4,7 @@ QDRANT_HOST = "localhost"
 QDRANT_SERVICE = "qdrant"
 API_PORT = 8005
 EMBEDDING_MODEL_CHECKPOINT = "sentence-transformers/multi-qa-MiniLM-L6-cos-v1"
+ONNX_MODEL_FILENAME = "model_optimized_quantized.onnx"
 
 # Container image tag
 TAG = "0.1.0"

diff --git a/dbs/qdrant/Dockerfile b/dbs/qdrant/Dockerfile
@@ -2,13 +2,12 @@ FROM python:3.10-slim-bullseye
 
 WORKDIR /wine
 
-COPY ./requirements-docker.txt /wine/requirements-docker.txt
+COPY ./requirements.txt /wine/requirements.txt
 
 RUN pip install --no-cache-dir -U pip wheel setuptools
-RUN pip install --no-cache-dir -r /wine/requirements-docker.txt
+RUN pip install --no-cache-dir -r /wine/requirements.txt
 
 COPY ./api /wine/api
 COPY ./schemas /wine/schemas
-COPY ./scripts/onnx_models /wine/scripts/onnx_models
 
 EXPOSE 8000
diff --git a/dbs/qdrant/Dockerfile.onnxruntime b/dbs/qdrant/Dockerfile.onnxruntime
@@ -0,0 +1,14 @@
+FROM python:3.10-slim-bullseye
+
+WORKDIR /wine
+
+COPY ./requirements-onnx.txt /wine/requirements-onnx.txt
+
+RUN pip install --no-cache-dir -U pip wheel setuptools
+RUN pip install --no-cache-dir -r /wine/requirements-onnx.txt
+
+COPY ./api /wine/api
+COPY ./schemas /wine/schemas
+COPY ./onnx_model /wine/onnx_model
+
+EXPOSE 8000
diff --git a/dbs/qdrant/README.md b/dbs/qdrant/README.md
diff --git a/dbs/qdrant/api/config.py b/dbs/qdrant/api/config.py
@@ -8,6 +8,7 @@ class Settings(BaseSettings):
     qdrant_service: str
     api_port = str
     embedding_model_checkpoint: str
+    onnx_model_filename: str
     tag: str
 
     class Config:

diff --git a/dbs/qdrant/api/main.py b/dbs/qdrant/api/main.py
@@ -8,7 +8,16 @@
 from api.config import Settings
 from api.routers.wine import wine_router
 
-from scripts.onnx_optimizer import get_embedding_pipeline
+try:
+    from optimum.onnxruntime import ORTModelForCustomTasks
+    from optimum.pipelines import pipeline
+    from transformers import AutoTokenizer
+
+    model_type = "onnx"
+except ModuleNotFoundError:
+    from sentence_transformers import SentenceTransformer
+
+    model_type = "sbert"
 
 
 @lru_cache()
@@ -17,14 +26,31 @@ def get_settings():
     return Settings()
 
 
+def get_embedding_pipeline(onnx_path, model_filename: str):
+    """
+    Create a sentence embedding pipeline using the optimized ONNX model, if available in the environment
+    """
+    # Reload tokenizer
+    tokenizer = AutoTokenizer.from_pretrained(onnx_path)
+    optimized_model = ORTModelForCustomTasks.from_pretrained(onnx_path, file_name=model_filename)
+    embedding_pipeline = pipeline("feature-extraction", model=optimized_model, tokenizer=tokenizer)
+    return embedding_pipeline
+
+
 @asynccontextmanager
 async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]:
     """Async context manager for Qdrant database connection."""
     settings = get_settings()
     model_checkpoint = settings.embedding_model_checkpoint
-    app.model = get_embedding_pipeline(
-        "scripts/onnx_models", model_filename="model_optimized_quantized.onnx"
-    )
+    if model_type == "sbert":
+        app.model = SentenceTransformer(model_checkpoint)
+        app.model_type = "sbert"
+    elif model_type == "onnx":
+        app.model = get_embedding_pipeline(
+            "onnx_model/onnx", model_filename=settings.onnx_model_filename
+        )
+        app.model_type = "onnx"
+    # Define Qdrant client
     app.client = QdrantClient(host=settings.qdrant_service, port=settings.qdrant_port)
     print("Successfully connected to Qdrant")
     yield

diff --git a/dbs/qdrant/api/routers/wine.py b/dbs/qdrant/api/routers/wine.py
@@ -1,7 +1,6 @@
+from fastapi import APIRouter, HTTPException, Query, Request
 from qdrant_client import QdrantClient
 from qdrant_client.http import models
-from fastapi import APIRouter, HTTPException, Query, Request
-from optimum.pipelines import pipeline
 
 from schemas.retriever import SimilaritySearch
 
@@ -16,17 +15,18 @@
     response_model=list[SimilaritySearch],
     response_description="Search wines by title, description and variety",
 )
-def search_by_keywords(
+def search_by_similarity(
     request: Request,
     terms: str = Query(description="Search wine by keywords in title, description and variety"),
     max_price: float = Query(
-        default=10000.0, description="Specify the maximum price for the wine (e.g., 30)"
+        default=100.0, description="Specify the maximum price for the wine (e.g., 30)"
+    ),
+    country: str = Query(
+        default=None, description="Specify the country of origin for the wine (e.g., Italy)"
     ),
 ) -> list[SimilaritySearch] | None:
-    model = request.app.model
-    client = request.app.client
     collection = "wines"
-    result = _search_by_keywords(client, model, collection, terms, max_price)
+    result = _search_by_similarity(request, collection, terms, max_price, country)
     if not result:
         raise HTTPException(
             status_code=404,
@@ -38,11 +38,14 @@ def search_by_keywords(
 # --- Helper functions ---
 
 
-def _search_by_keywords(
-    client: QdrantClient, model: pipeline, collection: str, terms: str, max_price: float
+def _search_by_similarity(
+    request: Request, collection: str, terms: str, max_price: float, country: str
 ) -> list[SimilaritySearch] | None:
     """Convert input text query into a vector for lookup in the db"""
-    vector = model(terms)[0][0]
+    if request.app.model_type == "sbert":
+        vector = request.app.model.encode(terms, show_progress_bar=False, batch_size=128).tolist()
+    elif request.app.model_type == "onnx":
+        vector = request.app.model(terms)[0][0]
 
     # Define a range filter for wine price
     filter = models.Filter(
@@ -53,13 +56,19 @@ def _search_by_keywords(
                     "range": {
                         "lte": max_price,
                     },
-                }
+                },
+                {
+                    "key": "country",
+                    "match": {
+                        "value": country,
+                    },
+                },
             ]
         }
     )
 
     # Use `vector` for similarity search on the closest vectors in the collection
-    search_result = client.search(
+    search_result = request.app.client.search(
         collection_name=collection, query_vector=vector, query_filter=filter, top=5
     )
     # `search_result` contains found vector ids with similarity scores along with the stored payload

diff --git a/dbs/qdrant/docker-compose-onnx.yml b/dbs/qdrant/docker-compose-onnx.yml
@@ -0,0 +1,39 @@
+version: "3"
+
+services:
+  qdrant:
+    image: qdrant/qdrant:${QDRANT_VERSION}
+    restart: unless-stopped
+    environment:
+      - QDRANT_HOST=${QDRANT_HOST}
+    ports:
+      - ${QDRANT_PORT}:6333
+    volumes:
+      - qdrant_storage:/qdrant/storage
+    networks:
+      - wine
+
+  fastapi:
+    image: qdrant_wine_fastapi:${TAG}
+    build:
+      context: .
+      dockerfile: Dockerfile.onnxruntime
+    restart: unless-stopped
+    env_file:
+      - .env
+    ports:
+      - ${API_PORT}:8000
+    depends_on:
+      - qdrant
+    volumes:
+      - ./:/wine
+    networks:
+      - wine
+    command: uvicorn api.main:app --host 0.0.0.0 --port 8000 --reload
+
+volumes:
+  qdrant_storage:
+
+networks:
+  wine:
+    driver: bridge
diff --git a/dbs/qdrant/docker-compose.yml b/dbs/qdrant/docker-compose.yml
@@ -10,28 +10,30 @@ services:
       - ${QDRANT_PORT}:6333
     volumes:
       - qdrant_storage:/qdrant/storage
-  #   networks:
-  #     - wine
+    networks:
+      - wine
 
-  # fastapi:
-  #   image: qdrant_wine_fastapi:${TAG}
-  #   build: .
-  #   restart: unless-stopped
-  #   env_file:
-  #     - .env
-  #   ports:
-  #     - ${API_PORT}:8000
-  #   depends_on:
-  #     - qdrant
-  #   volumes:
-  #     - ./:/wine
-  #   networks:
-  #     - wine
-  #   command: uvicorn api.main:app --host 0.0.0.0 --port 8000 --reload
+  fastapi:
+    image: qdrant_wine_fastapi:${TAG}
+    build:
+      context: .
+      dockerfile: Dockerfile
+    restart: unless-stopped
+    env_file:
+      - .env
+    ports:
+      - ${API_PORT}:8000
+    depends_on:
+      - qdrant
+    volumes:
+      - ./:/wine
+    networks:
+      - wine
+    command: uvicorn api.main:app --host 0.0.0.0 --port 8000 --reload
 
 volumes:
   qdrant_storage:
 
-# networks:
-#   wine:
-#     driver: bridge
+networks:
+  wine:
+    driver: bridge