From 8ea7f2f93b329e9f7d4dcb5471acd8018e6868a2 Mon Sep 17 00:00:00 2001 From: anindyam1969 Date: Tue, 5 Nov 2024 19:41:21 +0530 Subject: [PATCH] Handled empty search result handling and updated the notebook --- .../integrations/vectorstores/kinetica.ipynb | 390 ++++++++---------- .../vectorstores/kinetica.py | 44 +- 2 files changed, 186 insertions(+), 248 deletions(-) diff --git a/docs/docs/integrations/vectorstores/kinetica.ipynb b/docs/docs/integrations/vectorstores/kinetica.ipynb index 1d5344cf4201a..02ec8095657b9 100644 --- a/docs/docs/integrations/vectorstores/kinetica.ipynb +++ b/docs/docs/integrations/vectorstores/kinetica.ipynb @@ -33,35 +33,13 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.0\u001b[0m\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", - "Note: you may need to restart the kernel to use updated packages.\n", - "Requirement already satisfied: gpudb==7.2.0.0b in /home/anindyam/kinetica/kinetica-github/langchain/libs/langchain/.venv/lib/python3.8/site-packages (7.2.0.0b0)\n", - "Requirement already satisfied: future in /home/anindyam/kinetica/kinetica-github/langchain/libs/langchain/.venv/lib/python3.8/site-packages (from gpudb==7.2.0.0b) (0.18.3)\n", - "Requirement already satisfied: pyzmq in /home/anindyam/kinetica/kinetica-github/langchain/libs/langchain/.venv/lib/python3.8/site-packages (from gpudb==7.2.0.0b) (25.1.2)\n", - "\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.0\u001b[0m\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", - "Note: you may need to restart the kernel to use updated packages.\n", - "\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.0\u001b[0m\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", - "Note: you may need to restart the kernel to use updated packages.\n" - ] - } - ], + "outputs": [], "source": [ "# Pip install necessary package\n", "%pip install --upgrade --quiet langchain-openai langchain-community\n", - "%pip install gpudb==7.2.0.9\n", + "%pip install gpudb\n", "%pip install --upgrade --quiet tiktoken" ] }, @@ -74,7 +52,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -87,7 +65,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -96,7 +74,7 @@ "False" ] }, - "execution_count": 25, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -110,7 +88,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -120,36 +98,30 @@ " Kinetica,\n", " KineticaSettings,\n", ")\n", - "from langchain_core.documents import Document\n", - "from langchain_openai import OpenAIEmbeddings\n", - "from langchain_text_splitters import CharacterTextSplitter" + "from langchain_openai import OpenAIEmbeddings\n" ] }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ - "loader = TextLoader(\"../../how_to/state_of_the_union.txt\")\n", - "documents = loader.load()\n", - "text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n", - "docs = text_splitter.split_documents(documents)\n", "\n", - "embeddings = OpenAIEmbeddings()" + "embeddings = OpenAIEmbeddings(model=\"text-embedding-3-large\")" ] }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "# Kinetica needs the connection to the database.\n", "# This is how to set it up.\n", "HOST = os.getenv(\"KINETICA_HOST\", \"http://127.0.0.1:9191\")\n", - "USERNAME = os.getenv(\"KINETICA_USERNAME\", \"\")\n", - "PASSWORD = os.getenv(\"KINETICA_PASSWORD\", \"\")\n", + "USERNAME = os.getenv(\"KINETICA_USERNAME\", \"admin\")\n", + "PASSWORD = os.getenv(\"KINETICA_PASSWORD\", \"Kinetica1!\")\n", "OPENAI_API_KEY = os.getenv(\"OPENAI_API_KEY\", \"\")\n", "\n", "\n", @@ -157,216 +129,176 @@ " return KineticaSettings(host=HOST, username=USERNAME, password=PASSWORD)" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Similarity Search with Euclidean Distance (Default)" - ] - }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ - "# The Kinetica Module will try to create a table with the name of the collection.\n", - "# So, make sure that the collection name is unique and the user has the permission to create a table.\n", + "from uuid import uuid4\n", "\n", - "COLLECTION_NAME = \"state_of_the_union_test\"\n", - "connection = create_config()\n", + "from langchain_core.documents import Document\n", "\n", - "db = Kinetica.from_documents(\n", - " embedding=embeddings,\n", - " documents=docs,\n", - " collection_name=COLLECTION_NAME,\n", - " config=connection,\n", - ")" + "document_1 = Document(\n", + " page_content=\"I had chocalate chip pancakes and scrambled eggs for breakfast this morning.\",\n", + " metadata={\"source\": \"tweet\"},\n", + ")\n", + "\n", + "document_2 = Document(\n", + " page_content=\"The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.\",\n", + " metadata={\"source\": \"news\"},\n", + ")\n", + "\n", + "document_3 = Document(\n", + " page_content=\"Building an exciting new project with LangChain - come check it out!\",\n", + " metadata={\"source\": \"tweet\"},\n", + ")\n", + "\n", + "document_4 = Document(\n", + " page_content=\"Robbers broke into the city bank and stole $1 million in cash.\",\n", + " metadata={\"source\": \"news\"},\n", + ")\n", + "\n", + "document_5 = Document(\n", + " page_content=\"Wow! That was an amazing movie. I can't wait to see it again.\",\n", + " metadata={\"source\": \"tweet\"},\n", + ")\n", + "\n", + "document_6 = Document(\n", + " page_content=\"Is the new iPhone worth the price? Read this review to find out.\",\n", + " metadata={\"source\": \"website\"},\n", + ")\n", + "\n", + "document_7 = Document(\n", + " page_content=\"The top 10 soccer players in the world right now.\",\n", + " metadata={\"source\": \"website\"},\n", + ")\n", + "\n", + "document_8 = Document(\n", + " page_content=\"LangGraph is the best framework for building stateful, agentic applications!\",\n", + " metadata={\"source\": \"tweet\"},\n", + ")\n", + "\n", + "document_9 = Document(\n", + " page_content=\"The stock market is down 500 points today due to fears of a recession.\",\n", + " metadata={\"source\": \"news\"},\n", + ")\n", + "\n", + "document_10 = Document(\n", + " page_content=\"I have a bad feeling I am going to get deleted :(\",\n", + " metadata={\"source\": \"tweet\"},\n", + ")\n", + "\n", + "documents = [\n", + " document_1,\n", + " document_2,\n", + " document_3,\n", + " document_4,\n", + " document_5,\n", + " document_6,\n", + " document_7,\n", + " document_8,\n", + " document_9,\n", + " document_10,\n", + "]\n", + "uuids = [str(uuid4()) for _ in range(len(documents))]" ] }, { - "cell_type": "code", - "execution_count": 30, + "cell_type": "markdown", "metadata": {}, - "outputs": [], "source": [ - "query = \"What did the president say about Ketanji Brown Jackson\"\n", - "docs_with_score = db.similarity_search_with_score(query)" + "## Similarity Search with Euclidean Distance (Default)" ] }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 8, "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "--------------------------------------------------------------------------------\n", - "Score: 0.6077010035514832\n", - "Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \n", - "\n", - "Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n", - "\n", - "One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n", - "\n", - "And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.\n", - "--------------------------------------------------------------------------------\n", - "--------------------------------------------------------------------------------\n", - "Score: 0.6077010035514832\n", - "Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \n", - "\n", - "Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n", - "\n", - "One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n", - "\n", - "And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.\n", - "--------------------------------------------------------------------------------\n", - "--------------------------------------------------------------------------------\n", - "Score: 0.6596046090126038\n", - "A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since she’s been nominated, she’s received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \n", - "\n", - "And if we are to advance liberty and justice, we need to secure the Border and fix the immigration system. \n", - "\n", - "We can do both. At our border, we’ve installed new technology like cutting-edge scanners to better detect drug smuggling. \n", - "\n", - "We’ve set up joint patrols with Mexico and Guatemala to catch more human traffickers. \n", - "\n", - "We’re putting in place dedicated immigration judges so families fleeing persecution and violence can have their cases heard faster. \n", - "\n", - "We’re securing commitments and supporting partners in South and Central America to host more refugees and secure their own borders.\n", - "--------------------------------------------------------------------------------\n", - "--------------------------------------------------------------------------------\n", - "Score: 0.6597143411636353\n", - "A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since she’s been nominated, she’s received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \n", - "\n", - "And if we are to advance liberty and justice, we need to secure the Border and fix the immigration system. \n", - "\n", - "We can do both. At our border, we’ve installed new technology like cutting-edge scanners to better detect drug smuggling. \n", - "\n", - "We’ve set up joint patrols with Mexico and Guatemala to catch more human traffickers. \n", - "\n", - "We’re putting in place dedicated immigration judges so families fleeing persecution and violence can have their cases heard faster. \n", - "\n", - "We’re securing commitments and supporting partners in South and Central America to host more refugees and secure their own borders.\n", - "--------------------------------------------------------------------------------\n" - ] + "data": { + "text/plain": [ + "['05e5a484-0273-49d1-90eb-1276baca31de',\n", + " 'd98b808f-dc0b-4328-bdbf-88f6b2ab6040',\n", + " 'ba0968d4-e344-4285-ae0f-f5199b56f9d6',\n", + " 'a25393b8-6539-45b5-993e-ea16d01941ec',\n", + " '804a37e3-1278-4b60-8b02-36b159ee8c1a',\n", + " '9688b594-3dc6-41d2-a937-babf8ff24c2f',\n", + " '40f7b8fe-67c7-489a-a5a5-7d3965e33bba',\n", + " 'b4fc1376-c113-41e9-8f16-f9320517bedd',\n", + " '4d94d089-fdde-442b-84ab-36d9fe0670c8',\n", + " '66fdb79d-49ce-4b06-901a-fda6271baf2a']" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "for doc, score in docs_with_score:\n", - " print(\"-\" * 80)\n", - " print(\"Score: \", score)\n", - " print(doc.page_content)\n", - " print(\"-\" * 80)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Maximal Marginal Relevance Search (MMR)\n", - "Maximal marginal relevance optimizes for similarity to query AND diversity among selected documents." + "# The Kinetica Module will try to create a table with the name of the collection.\n", + "# So, make sure that the collection name is unique and the user has the permission to create a table.\n", + "\n", + "COLLECTION_NAME = \"langchain_example\"\n", + "connection = create_config()\n", + "\n", + "db = Kinetica(\n", + " connection,\n", + " embeddings,\n", + " collection_name=COLLECTION_NAME,\n", + ")\n", + "\n", + "db.add_documents(documents=documents, ids=uuids)" ] }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ - "docs_with_score = db.max_marginal_relevance_search_with_score(query)" + "# query = \"What did the president say about Ketanji Brown Jackson\"\n", + "# docs_with_score = db.similarity_search_with_score(query)" ] }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "--------------------------------------------------------------------------------\n", - "Score: 0.6077010035514832\n", - "Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \n", - "\n", - "Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n", - "\n", - "One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n", - "\n", - "And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.\n", - "--------------------------------------------------------------------------------\n", - "--------------------------------------------------------------------------------\n", - "Score: 0.6852865219116211\n", - "It is going to transform America and put us on a path to win the economic competition of the 21st Century that we face with the rest of the world—particularly with China. \n", - "\n", - "As I’ve told Xi Jinping, it is never a good bet to bet against the American people. \n", - "\n", - "We’ll create good jobs for millions of Americans, modernizing roads, airports, ports, and waterways all across America. \n", - "\n", - "And we’ll do it all to withstand the devastating effects of the climate crisis and promote environmental justice. \n", - "\n", - "We’ll build a national network of 500,000 electric vehicle charging stations, begin to replace poisonous lead pipes—so every child—and every American—has clean water to drink at home and at school, provide affordable high-speed internet for every American—urban, suburban, rural, and tribal communities. \n", - "\n", - "4,000 projects have already been announced. \n", - "\n", - "And tonight, I’m announcing that this year we will start fixing over 65,000 miles of highway and 1,500 bridges in disrepair.\n", - "--------------------------------------------------------------------------------\n", - "--------------------------------------------------------------------------------\n", - "Score: 0.6866700053215027\n", - "We can’t change how divided we’ve been. But we can change how we move forward—on COVID-19 and other issues we must face together. \n", - "\n", - "I recently visited the New York City Police Department days after the funerals of Officer Wilbert Mora and his partner, Officer Jason Rivera. \n", - "\n", - "They were responding to a 9-1-1 call when a man shot and killed them with a stolen gun. \n", - "\n", - "Officer Mora was 27 years old. \n", "\n", - "Officer Rivera was 22. \n", + "Similarity Search\n", + "* Building an exciting new project with LangChain - come check it out! [{'source': 'tweet'}]\n", + "* LangGraph is the best framework for building stateful, agentic applications! [{'source': 'tweet'}]\n", "\n", - "Both Dominican Americans who’d grown up on the same streets they later chose to patrol as police officers. \n", - "\n", - "I spoke with their families and told them that we are forever in debt for their sacrifice, and we will carry on their mission to restore the trust and safety every community deserves. \n", - "\n", - "I’ve worked on these issues a long time. \n", - "\n", - "I know what works: Investing in crime prevention and community police officers who’ll walk the beat, who’ll know the neighborhood, and who can restore trust and safety.\n", - "--------------------------------------------------------------------------------\n", - "--------------------------------------------------------------------------------\n", - "Score: 0.6936529278755188\n", - "But cancer from prolonged exposure to burn pits ravaged Heath’s lungs and body. \n", - "\n", - "Danielle says Heath was a fighter to the very end. \n", - "\n", - "He didn’t know how to stop fighting, and neither did she. \n", - "\n", - "Through her pain she found purpose to demand we do better. \n", - "\n", - "Tonight, Danielle—we are. \n", - "\n", - "The VA is pioneering new ways of linking toxic exposures to diseases, already helping more veterans get benefits. \n", - "\n", - "And tonight, I’m announcing we’re expanding eligibility to veterans suffering from nine respiratory cancers. \n", - "\n", - "I’m also calling on Congress: pass a law to make sure veterans devastated by toxic exposures in Iraq and Afghanistan finally get the benefits and comprehensive health care they deserve. \n", - "\n", - "And fourth, let’s end cancer as we know it. \n", - "\n", - "This is personal to me and Jill, to Kamala, and to so many of you. \n", - "\n", - "Cancer is the #2 cause of death in America–second only to heart disease.\n", - "--------------------------------------------------------------------------------\n" + "Similarity search with score\n", + "* [SIM=0.945397] The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees. [{'source': 'news'}]\n" ] } ], "source": [ - "for doc, score in docs_with_score:\n", - " print(\"-\" * 80)\n", - " print(\"Score: \", score)\n", - " print(doc.page_content)\n", - " print(\"-\" * 80)" + "print()\n", + "print(\"Similarity Search\")\n", + "results = db.similarity_search(\n", + " \"LangChain provides abstractions to make working with LLMs easy\",\n", + " k=2,\n", + " filter={\"source\": \"tweet\"},\n", + ")\n", + "for res in results:\n", + " print(f\"* {res.page_content} [{res.metadata}]\")\n", + " \n", + "print()\n", + "print(\"Similarity search with score\")\n", + "results = db.similarity_search_with_score(\n", + " \"Will it be hot tomorrow?\", k=1, filter={\"source\": \"news\"}\n", + ")\n", + "for res, score in results:\n", + " print(f\"* [SIM={score:3f}] {res.page_content} [{res.metadata}]\")" ] }, { @@ -381,7 +313,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ @@ -402,16 +334,16 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "['b94dc67c-ce7e-11ee-b8cb-b940b0e45762']" + "['68c4c679-c4d9-4f2d-bf01-f6c4f2181503']" ] }, - "execution_count": 35, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -422,7 +354,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ @@ -431,16 +363,16 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "(Document(page_content='foo'), 0.0)" + "(Document(metadata={}, page_content='foo'), 0.0015394920483231544)" ] }, - "execution_count": 37, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -451,17 +383,17 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "(Document(page_content='A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since she’s been nominated, she’s received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \\n\\nAnd if we are to advance liberty and justice, we need to secure the Border and fix the immigration system. \\n\\nWe can do both. At our border, we’ve installed new technology like cutting-edge scanners to better detect drug smuggling. \\n\\nWe’ve set up joint patrols with Mexico and Guatemala to catch more human traffickers. \\n\\nWe’re putting in place dedicated immigration judges so families fleeing persecution and violence can have their cases heard faster. \\n\\nWe’re securing commitments and supporting partners in South and Central America to host more refugees and secure their own borders.', metadata={'source': '../../how_to/state_of_the_union.txt'}),\n", - " 0.6946534514427185)" + "(Document(metadata={'source': 'tweet'}, page_content='Building an exciting new project with LangChain - come check it out!'),\n", + " 1.2609431743621826)" ] }, - "execution_count": 38, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -481,12 +413,12 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "db = Kinetica.from_documents(\n", - " documents=docs,\n", + " documents=documents,\n", " embedding=embeddings,\n", " collection_name=COLLECTION_NAME,\n", " config=connection,\n", @@ -496,7 +428,7 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 17, "metadata": {}, "outputs": [], "source": [ @@ -505,17 +437,17 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "(Document(page_content='A former top litigator in private practice. A former federal public defender. And from a family of public school educators and police officers. A consensus builder. Since she’s been nominated, she’s received a broad range of support—from the Fraternal Order of Police to former judges appointed by Democrats and Republicans. \\n\\nAnd if we are to advance liberty and justice, we need to secure the Border and fix the immigration system. \\n\\nWe can do both. At our border, we’ve installed new technology like cutting-edge scanners to better detect drug smuggling. \\n\\nWe’ve set up joint patrols with Mexico and Guatemala to catch more human traffickers. \\n\\nWe’re putting in place dedicated immigration judges so families fleeing persecution and violence can have their cases heard faster. \\n\\nWe’re securing commitments and supporting partners in South and Central America to host more refugees and secure their own borders.', metadata={'source': '../../how_to/state_of_the_union.txt'}),\n", - " 0.6946534514427185)" + "(Document(metadata={'source': 'tweet'}, page_content='Building an exciting new project with LangChain - come check it out!'),\n", + " 1.260920763015747)" ] }, - "execution_count": 41, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } @@ -533,7 +465,7 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 19, "metadata": {}, "outputs": [], "source": [ @@ -542,14 +474,14 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 20, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "tags=['Kinetica', 'OpenAIEmbeddings'] vectorstore=\n" + "tags=['Kinetica', 'OpenAIEmbeddings'] vectorstore= search_kwargs={}\n" ] } ], @@ -574,7 +506,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.10" + "version": "3.10.12" } }, "nbformat": 4, diff --git a/libs/community/langchain_community/vectorstores/kinetica.py b/libs/community/langchain_community/vectorstores/kinetica.py index b9f987219b356..1188e0ffa5d9b 100644 --- a/libs/community/langchain_community/vectorstores/kinetica.py +++ b/libs/community/langchain_community/vectorstores/kinetica.py @@ -12,10 +12,11 @@ from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Type import numpy as np +from pydantic_settings import SettingsConfigDict, BaseSettings + from langchain_core.documents import Document from langchain_core.embeddings import Embeddings from langchain_core.vectorstores import VectorStore -from pydantic_settings import BaseSettings, SettingsConfigDict from langchain_community.vectorstores.utils import maximal_marginal_relevance @@ -93,7 +94,7 @@ class Kinetica(VectorStore): To use, you should have the ``gpudb`` python package installed. Args: - kinetica_settings: Kinetica connection settings class. + config: Kinetica connection settings class. embedding_function: Any embedding function implementing `langchain.embeddings.base.Embeddings` interface. collection_name: The name of the collection to use. (default: langchain) @@ -170,7 +171,7 @@ def __post_init__(self, dimensions: int) -> None: except ImportError: raise ImportError( "Could not import Kinetica python API. " - "Please install it with `pip install gpudb==7.2.0.9`." + "Please install it with `pip install gpudb>=7.2.2.0`." ) self.dimensions = dimensions @@ -199,7 +200,7 @@ def __get_db(self, config: KineticaSettings) -> Any: except ImportError: raise ImportError( "Could not import Kinetica python API. " - "Please install it with `pip install gpudb==7.2.0.9`." + "Please install it with `pip install gpudb>=7.2.2.0`." ) options = GPUdb.Options() @@ -290,7 +291,7 @@ def create_tables_if_not_exists(self) -> Any: except ImportError: raise ImportError( "Could not import Kinetica python API. " - "Please install it with `pip install gpudb==7.2.0.9`." + "Please install it with `pip install gpudb>=7.2.2.0`." ) return GPUdbTable( _type=self.table_schema, @@ -428,7 +429,7 @@ def similarity_search_with_score_by_vector( k: int = 4, filter: Optional[dict] = None, ) -> List[Tuple[Document, float]]: - from gpudb import GPUdbException + # from gpudb import GPUdbException resp: Dict = self.__query_collection(embedding, k, filter) if resp and resp["status_info"]["status"] == "OK" and "records" in resp: @@ -436,9 +437,10 @@ def similarity_search_with_score_by_vector( results = list(zip(*list(records.values()))) return self._results_to_docs_and_scores(results) - else: - self.logger.error(resp["status_info"]["message"]) - raise GPUdbException(resp["status_info"]["message"]) + + self.logger.error(resp["status_info"]["message"]) + # raise GPUdbException(resp["status_info"]["message"]) + return [] def similarity_search_by_vector( self, @@ -464,16 +466,20 @@ def similarity_search_by_vector( def _results_to_docs_and_scores(self, results: Any) -> List[Tuple[Document, float]]: """Return docs and scores from results.""" - docs = [ - ( - Document( - page_content=result[0], - metadata=json.loads(result[1]), - ), - result[2] if self.embedding_function is not None else None, - ) - for result in results - ] + docs = ( + [ + ( + Document( + page_content=result[0], + metadata=json.loads(result[1]), + ), + result[2] if self.embedding_function is not None else None, + ) + for result in results + ] + if len(results) > 0 + else [] + ) return docs def _select_relevance_score_fn(self) -> Callable[[float], float]: