address comments

langchain-ai · Dec 23, 2023 · 1581d2b · 1581d2b
1 parent 8019aa5
commit 1581d2b
Show file tree

Hide file tree

Showing 3 changed files with 73 additions and 33 deletions.
diff --git a/docs/docs/integrations/vectorstores/bigquery_vector_search.ipynb b/docs/docs/integrations/vectorstores/bigquery_vector_search.ipynb
@@ -8,10 +8,14 @@
       "source": [
         "# BigQueryVectorSearch\n",
         "> **BigQueryVectorSearch**:\n",
-        "BigQuery vector search lets you use GoogleSQL to do semantic search, using vector indexes for fast but approximate results, or using brute force for exact results.\n",
+        "BigQuery vector search lets you use GoogleSQL to do semantic\n",
+        "search, using vector indexes for fast but approximate results,\n",
+        "or using brute force for exact results.\n",
         "\n",
         "\n",
-        "This tutorial illustrates how to work with an end-to-end data and embedding management system in LangChain, and provide scalable semantic search in BigQuery."
+        "This tutorial illustrates how to work with an end-to-end data\n",
+        "and embedding management system in LangChain, and provide scalable\n",
+        "semantic search in BigQuery."
       ]
     },
     {
@@ -45,7 +49,24 @@
       },
       "outputs": [],
       "source": [
-        "! pip install google-cloud-aiplatform langchain==0.0.316 google-cloud-bigquery pydantic==1.10.8 typing-inspect==0.8.0 typing_extensions==4.5.0 pandas openai==0.28.1 tiktoken datasets google-api-python-client pypdf faiss-cpu transformers config --upgrade --user"
+        "! pip install \\\n",
+        "    google-cloud-aiplatform \\\n",
+        "    langchain==0.0.316 \\\n",
+        "    google-cloud-bigquery \\\n",
+        "    pydantic==1.10.8 \\\n",
+        "    typing-inspect==0.8.0 \\\n",
+        "    typing_extensions==4.5.0 \\\n",
+        "    pandas \\\n",
+        "    openai==0.28.1 \\\n",
+        "    tiktoken \\\n",
+        "    datasets \\\n",
+        "    google-api-python-client \\\n",
+        "    pypdf \\\n",
+        "    faiss-cpu \\\n",
+        "    transformers \\\n",
+        "    config \\\n",
+        "    --upgrade \\\n",
+        "    --user"
       ]
     },
     {
@@ -54,7 +75,10 @@
         "id": "v40bB_GMcr9f"
       },
       "source": [
-        "**Colab only:** Uncomment the following cell to restart the kernel or use the button to restart the kernel. For Vertex AI Workbench you can restart the terminal using the button on top."
+        "**Colab only:** Uncomment the following cell to restart the\n",
+        "kernel or use the button to restart the kernel. For Vertex\n",
+        "AI Workbench you can restart the terminal using the button\n",
+        "on top."
       ]
     },
     {
@@ -65,7 +89,8 @@
       },
       "outputs": [],
       "source": [
-        "# # Automatically restart kernel after installs so that your environment can access the new packages\n",
+        "# # Automatically restart kernel after installs so that your\n",
+        "# # environment can access the new packages\n",
         "# import IPython\n",
         "\n",
         "# app = IPython.Application.instance()\n",
@@ -88,7 +113,8 @@
         "If you don't know your project ID, try the following:\n",
         "* Run `gcloud config list`.\n",
         "* Run `gcloud projects list`.\n",
-        "* See the support page: [Locate the project ID](https://support.google.com/googleapi/answer/7014113)."
+        "* See the support page:\n",
+        "[Locate the project ID](https://support.google.com/googleapi/answer/7014113)."
       ]
     },
     {
@@ -109,7 +135,10 @@
       "source": [
         "#### Set the region\n",
         "\n",
-        "You can also change the `REGION` variable used by BigQuery. Learn more about [BigQuery regions](https://cloud.google.com/bigquery/docs/locations#supported_locations)."
+        "You can also change the `REGION` variable used by BigQuery.\n",
+        "Learn more about \n",
+        "[BigQuery regions](https://cloud.google.com/bigquery/docs/locations\\\n",
+        "#supported_locations)."
       ]
     },
     {
@@ -127,8 +156,11 @@
       "source": [
         "### Authenticating your notebook environment\n",
         "\n",
-        "- If you are using **Colab** to run this notebook, uncomment the cell below and continue.\n",
-        "- If you are using **Vertex AI Workbench**, check out the setup instructions [here](https://github.com/GoogleCloudPlatform/generative-ai/tree/main/setup-env)."
+        "- If you are using **Colab** to run this notebook, uncomment the cell\n",
+        "below and continue.\n",
+        "- If you are using **Vertex AI Workbench**, check out the setup\n",
+        "instructions\n",
+        "[here](https://github.com/GoogleCloudPlatform/generative-ai/tree/main/setup-env)."
       ]
     },
     {
@@ -138,6 +170,7 @@
       "outputs": [],
       "source": [
         "from google.colab import auth as google_auth\n",
+        "\n",
         "google_auth.authenticate_user()"
       ]
     },
@@ -219,11 +252,11 @@
         "]\n",
         "\n",
         "store = BigQueryVectorSearch(\n",
-        "            embedding,\n",
-        "            project_id=PROJECT_ID,\n",
-        "            dataset_name=\"<your_dataset>\",\n",
-        "            table_name=\"<your_table>\",\n",
-        "            location=REGION)"
+        "    embedding,\n",
+        "    project_id=PROJECT_ID,\n",
+        "    dataset_name=\"<your_dataset>\",\n",
+        "    table_name=\"<your_table>\",\n",
+        "    location=REGION)"
       ]
     },
     {
@@ -244,16 +277,16 @@
         "DEFAULT_DISTANCE_STRATEGY = DistanceStrategy.EUCLIDEAN_DISTANCE\n",
         "\n",
         "bq_vector_search = BigQueryVectorSearch(\n",
-        "                                project_id=PROJECT_ID,\n",
-        "                                dataset_name=\"your_dataset\",\n",
-        "                                table_name=\"<your_table>\",\n",
-        "                                # Column {content_field} must be of STRING type\n",
-        "                                content_field=\"<your_content>\",\n",
-        "                                # Column {text_embedding_field} must be of ARRAY<FLOAT64> type\n",
-        "                                text_embedding_field=\"<your_embedding>\",\n",
-        "                                embedding=embedding,\n",
-        "                                distance_strategy=DEFAULT_DISTANCE_STRATEGY,\n",
-        "                                location=REGION)"
+        "    project_id=PROJECT_ID,\n",
+        "    dataset_name=\"your_dataset\",\n",
+        "    table_name=\"<your_table>\",\n",
+        "    # Column {content_field} must be of STRING type\n",
+        "    content_field=\"<your_content>\",\n",
+        "    # Column {text_embedding_field} must be of ARRAY<FLOAT64> type\n",
+        "    text_embedding_field=\"<your_embedding>\",\n",
+        "    embedding=embedding,\n",
+        "    distance_strategy=DEFAULT_DISTANCE_STRATEGY,\n",
+        "    location=REGION)"
       ]
     },
     {
@@ -311,7 +344,7 @@
       "outputs": [],
       "source": [
         "query_vector = embedding.embed_query(query)\n",
-        "docs = store.similarity_search_by_vector(query_vector, k=2)"
+        "docs = store.similarity_search_by_vector(query_vector,k=2)"
       ]
     },
     {
@@ -327,7 +360,10 @@
       "metadata": {},
       "outputs": [],
       "source": [
-        "docs = store.similarity_search_by_vector(query_vector, filter={\"float_t\": 1.23})"
+        "docs = store.similarity_search_by_vector(\n",
+        "    query_vector,\n",
+        "    filter={\"float_t\": 1.23}\n",
+        ")"
       ]
     }
   ],

diff --git a/libs/community/langchain_community/vectorstores/bigquery_vector_search.py b/libs/community/langchain_community/vectorstores/bigquery_vector_search.py
@@ -12,10 +12,10 @@
 from typing import Any, Callable, Dict, List, Optional, Tuple, Type
 
 import numpy as np
-from langchain.docstore.document import Document
-from langchain.schema.embeddings import Embeddings
-from langchain.schema.vectorstore import VectorStore
 from langchain.vectorstores.utils import maximal_marginal_relevance
+from langchain_core.documents import Document
+from langchain_core.embeddings import Embeddings
+from langchain_core.vectorstores import VectorStore
 
 from langchain_community.vectorstores.utils import DistanceStrategy
 

diff --git a/libs/community/tests/integration_tests/vectorstores/test_bigquery_vector_search.py b/libs/community/tests/integration_tests/vectorstores/test_bigquery_vector_search.py
@@ -8,16 +8,20 @@
 import uuid
 
 import pytest
-from google.cloud import bigquery
 
 from langchain_community.vectorstores.bigquery_vector_search import BigQueryVectorSearch
+from tests.integration_tests.vectorstores.fake_embeddings import FakeEmbeddings
 
+TEST_TABLE_NAME = "langchain_test_table"
 
 @pytest.fixture(scope="class")
 def store(request: pytest.FixtureRequest) -> BigQueryVectorSearch:
     """BigQueryVectorStore tests context."""
+    from google.cloud import bigquery
     TestBigQueryVectorStore.store = BigQueryVectorSearch(
-        dataset_name=TestBigQueryVectorStore.dataset_name
+        embedding=FakeEmbeddings,
+        dataset_name=TestBigQueryVectorStore.dataset_name,
+        table_name=TEST_TABLE_NAME
     )
     TestBigQueryVectorStore.store.add_texts(
         TestBigQueryVectorStore.texts, TestBigQueryVectorStore.metadatas
@@ -70,7 +74,7 @@ def test_semantic_search(self, store: BigQueryVectorSearch) -> None:
     def test_semantic_search_filter_fruits(self, store: BigQueryVectorSearch) -> None:
         """Test on semantic similarity with metadata filter."""
         docs = store.similarity_search(
-            "food", metadata_filter="JSON_VALUE(metadata,'$.kind') = 'fruit'"
+            "food", filter={"kind":"fruit"}
         )
         kinds = [d.metadata["kind"] for d in docs]
         assert "fruit" in kinds
@@ -80,7 +84,7 @@ def test_semantic_search_filter_fruits(self, store: BigQueryVectorSearch) -> Non
     def test_get_doc_by_filter(self, store: BigQueryVectorSearch) -> None:
         """Test on document retrieval with metadata filter."""
         docs = store.get_documents(
-            metadata_filter="JSON_VALUE(metadata,'$.kind') = 'fruit'"
+            filter={"kind":"fruit"}
         )
         kinds = [d.metadata["kind"] for d in docs]
         assert "fruit" in kinds