From e2e437da40775c8ea011ea309ee509616282626d Mon Sep 17 00:00:00 2001
From: Lance Martin <lance@langchain.dev>
Date: Wed, 13 Dec 2023 14:57:49 -0800
Subject: [PATCH] fmt, add env vars

---
 .../README.md                                 |  18 +-
 .../ingest.py                                 |   9 +-
 .../rag_chroma_multi_modal_multi_vector.ipynb | 345 ------------------
 .../chain.py                                  |   9 +-
 4 files changed, 19 insertions(+), 362 deletions(-)

diff --git a/templates/rag-chroma-multi-modal-multi-vector/README.md b/templates/rag-chroma-multi-modal-multi-vector/README.md
index ea4772bbb246f..cd4486c941178 100644
--- a/templates/rag-chroma-multi-modal-multi-vector/README.md
+++ b/templates/rag-chroma-multi-modal-multi-vector/README.md
@@ -1,5 +1,5 @@
 
-# rag-chroma-multi-modal
+# rag-chroma-multi-modal-multi-vector
 
 Presentations (slide decks, etc) contain visual content that challenges conventional RAG.
 
@@ -7,24 +7,24 @@ Multi-modal LLMs unlock new ways to build apps over visual content like presenta
  
 This template performs multi-modal RAG using Chroma with the multi-vector retriever (see [blog](https://blog.langchain.dev/multi-modal-rag-template/)):
 
-* Extract the slides as images
-* Use GPT-4V to summarize each image
-* Embed the image summaries with a link to the original images
-* Retrieve relevant image based on similarity between the image summary and the user input
+* Extracts the slides as images
+* Uses GPT-4V to summarize each image
+* Embeds the image summaries with a link to the original images
+* Retrieves relevant image based on similarity between the image summary and the user input
 * Finally pass those images to GPT-4V for answer synthesis
 
 ## Storage
 
-We will use Upstash to store the images.
+We will use Upstash to store the images, which offers Redis with a REST API.
 
 Simply login [here](https://upstash.com/) and create a database.
 
-This will give you: 
+This will give you a REST API with:
 
 * UPSTASH_URL
 * UPSTASH_TOKEN
 
-Set these in chain.py (***TODO: Update this? Env var?***)
+Set `UPSTASH_URL` and `UPSTASH_TOKEN` as environment variables to access your database.
 
 We will use Chroma to store and index the image summaries, which will be created locally in the template directory.
 
@@ -47,6 +47,8 @@ The app will retrieve images using multi-modal embeddings, and pass them to GPT-
 
 Set the `OPENAI_API_KEY` environment variable to access the OpenAI GPT-4V.
 
+Set `UPSTASH_URL` and `UPSTASH_TOKEN` as environment variables to access your database.
+
 ## Usage
 
 To use this package, you should first have the LangChain CLI installed:
diff --git a/templates/rag-chroma-multi-modal-multi-vector/ingest.py b/templates/rag-chroma-multi-modal-multi-vector/ingest.py
index ec67109347c88..c2a6358d68604 100644
--- a/templates/rag-chroma-multi-modal-multi-vector/ingest.py
+++ b/templates/rag-chroma-multi-modal-multi-vector/ingest.py
@@ -1,5 +1,6 @@
 import base64
 import io
+import os
 import uuid
 from io import BytesIO
 from pathlib import Path
@@ -64,8 +65,8 @@ def generate_img_summaries(img_base64_list):
         try:
             image_summaries.append(image_summarize(base64_image, prompt))
             processed_images.append(base64_image)
-        except:
-            print(f"BadRequestError with image {i+1}")
+        except Exception as e:
+            print(f"Error with image {i+1}: {e}")
 
     return image_summaries, processed_images
 
@@ -136,8 +137,8 @@ def create_multi_vector_retriever(vectorstore, image_summaries, images):
     """
 
     # Initialize the storage layer for images
-    UPSTASH_URL = "https://usw1-bright-beagle-34178.upstash.io"
-    UPSTASH_TOKEN = "AYWCACQgNzk3OTJjZTItMGIxNy00MTEzLWIyZTAtZWI0ZmI1ZGY0NjFhNGRhMGZjNDE4YjgxNGE4MTkzOWYxMzllM2MzZThlOGY="
+    UPSTASH_URL = os.getenv("UPSTASH_URL")
+    UPSTASH_TOKEN = os.getenv("UPSTASH_TOKEN")
     store = UpstashRedisByteStore(url=UPSTASH_URL, token=UPSTASH_TOKEN)
     id_key = "doc_id"
 
diff --git a/templates/rag-chroma-multi-modal-multi-vector/rag_chroma_multi_modal_multi_vector.ipynb b/templates/rag-chroma-multi-modal-multi-vector/rag_chroma_multi_modal_multi_vector.ipynb
index d93ae9a0d29de..bfaa9d82725cb 100644
--- a/templates/rag-chroma-multi-modal-multi-vector/rag_chroma_multi_modal_multi_vector.ipynb
+++ b/templates/rag-chroma-multi-modal-multi-vector/rag_chroma_multi_modal_multi_vector.ipynb
@@ -26,351 +26,6 @@
     "rag_app = RemoteRunnable(\"http://localhost:8001/rag-chroma-multi-modal-multi-vector\")\n",
     "rag_app.invoke(\"What is the projected TAM for observability expected for each year through 2026?\")"
    ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "98a94c02-1f0e-4e38-a1df-572d95913e01",
-   "metadata": {},
-   "source": [
-    "## TMP (TODO: Remove)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "id": "e0eb4640-7f44-4c97-942c-54927640d954",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import base64\n",
-    "import io\n",
-    "import json\n",
-    "from pathlib import Path\n",
-    "\n",
-    "from langchain.chat_models import ChatOpenAI\n",
-    "from langchain.embeddings import OpenAIEmbeddings\n",
-    "from langchain.pydantic_v1 import BaseModel\n",
-    "from langchain.retrievers.multi_vector import MultiVectorRetriever\n",
-    "from langchain.schema.messages import HumanMessage\n",
-    "from langchain.schema.output_parser import StrOutputParser\n",
-    "from langchain.schema.runnable import RunnableLambda, RunnablePassthrough\n",
-    "from langchain.storage import UpstashRedisByteStore\n",
-    "from langchain.vectorstores import Chroma\n",
-    "from PIL import Image\n",
-    "\n",
-    "\n",
-    "def resize_base64_image(base64_string, size=(128, 128)):\n",
-    "    \"\"\"\n",
-    "    Resize an image encoded as a Base64 string.\n",
-    "\n",
-    "    :param base64_string: A Base64 encoded string of the image to be resized.\n",
-    "    :param size: A tuple representing the new size (width, height) for the image.\n",
-    "    :return: A Base64 encoded string of the resized image.\n",
-    "    \"\"\"\n",
-    "    img_data = base64.b64decode(base64_string)\n",
-    "    img = Image.open(io.BytesIO(img_data))\n",
-    "    resized_img = img.resize(size, Image.LANCZOS)\n",
-    "    buffered = io.BytesIO()\n",
-    "    resized_img.save(buffered, format=img.format)\n",
-    "    return base64.b64encode(buffered.getvalue()).decode(\"utf-8\")\n",
-    "\n",
-    "\n",
-    "def get_resized_images(docs):\n",
-    "    \"\"\"\n",
-    "    Resize images from base64-encoded strings.\n",
-    "\n",
-    "    :param docs: A list of base64-encoded image to be resized.\n",
-    "    :return: Dict containing a list of resized base64-encoded strings.\n",
-    "    \"\"\"\n",
-    "    b64_images = []\n",
-    "    for doc in docs:\n",
-    "        doc = json.loads(doc.decode(\"utf-8\"))[\"kwargs\"][\"page_content\"]\n",
-    "        resized_image = resize_base64_image(doc, size=(1280, 720))\n",
-    "        b64_images.append(resized_image)\n",
-    "    return {\"images\": b64_images}\n",
-    "\n",
-    "\n",
-    "def img_prompt_func(data_dict, num_images=2):\n",
-    "    \"\"\"\n",
-    "    GPT-4V prompt for image analysis.\n",
-    "\n",
-    "    :param data_dict: A dict with images and a user-provided question.\n",
-    "    :param num_images: Number of images to include in the prompt.\n",
-    "    :return: A list containing message objects for each image and the text prompt.\n",
-    "    \"\"\"\n",
-    "    messages = []\n",
-    "    if data_dict[\"context\"][\"images\"]:\n",
-    "        for image in data_dict[\"context\"][\"images\"][:num_images]:\n",
-    "            image_message = {\n",
-    "                \"type\": \"image_url\",\n",
-    "                \"image_url\": {\"url\": f\"data:image/jpeg;base64,{image}\"},\n",
-    "            }\n",
-    "            messages.append(image_message)\n",
-    "    text_message = {\n",
-    "        \"type\": \"text\",\n",
-    "        \"text\": (\n",
-    "            \"You are an analyst tasked with answering questions about visual content.\\n\"\n",
-    "            \"You will be give a set of image(s) from a slide deck / presentation.\\n\"\n",
-    "            \"Use this information to answer the user question. \\n\"\n",
-    "            f\"User-provided question: {data_dict['question']}\\n\\n\"\n",
-    "        ),\n",
-    "    }\n",
-    "    messages.append(text_message)\n",
-    "    return [HumanMessage(content=messages)]\n",
-    "\n",
-    "\n",
-    "def multi_modal_rag_chain(retriever):\n",
-    "    \"\"\"\n",
-    "    Multi-modal RAG chain,\n",
-    "\n",
-    "    :param retriever: A function that retrieves the necessary context for the model.\n",
-    "    :return: A chain of functions representing the multi-modal RAG process.\n",
-    "    \"\"\"\n",
-    "    # Initialize the multi-modal Large Language Model with specific parameters\n",
-    "    model = ChatOpenAI(temperature=0, model=\"gpt-4-vision-preview\", max_tokens=1024)\n",
-    "\n",
-    "    # Define the RAG pipeline\n",
-    "    chain = (\n",
-    "        {\n",
-    "            \"context\": retriever | RunnableLambda(get_resized_images),\n",
-    "            \"question\": RunnablePassthrough(),\n",
-    "        }\n",
-    "        | RunnableLambda(img_prompt_func)\n",
-    "        | model\n",
-    "        | StrOutputParser()\n",
-    "    )\n",
-    "\n",
-    "    return chain\n",
-    "\n",
-    "\n",
-    "# Load chroma\n",
-    "vectorstore_mvr = Chroma(\n",
-    "    collection_name=\"image_summaries\",\n",
-    "    persist_directory=\"chroma_db_multi_modal\",\n",
-    "    embedding_function=OpenAIEmbeddings(),\n",
-    ")\n",
-    "\n",
-    "# Load redis\n",
-    "UPSTASH_URL = \"https://usw1-bright-beagle-34178.upstash.io\"\n",
-    "UPSTASH_TOKEN = \"AYWCACQgNzk3OTJjZTItMGIxNy00MTEzLWIyZTAtZWI0ZmI1ZGY0NjFhNGRhMGZjNDE4YjgxNGE4MTkzOWYxMzllM2MzZThlOGY=\"\n",
-    "store = UpstashRedisByteStore(url=UPSTASH_URL,\n",
-    "                            token=UPSTASH_TOKEN)\n",
-    "id_key = \"doc_id\"\n",
-    "\n",
-    "# Create the multi-vector retriever\n",
-    "retriever = MultiVectorRetriever(\n",
-    "    vectorstore=vectorstore_mvr,\n",
-    "    docstore=store,\n",
-    "    id_key=id_key,\n",
-    ")\n",
-    "\n",
-    "# Create RAG chain\n",
-    "chain = multi_modal_rag_chain(retriever)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "id": "57af60d7-fc78-406f-9cdd-fc64ea7798f9",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "'The total number of customers for Datadog, as shown in the image, is approximately 26,800.'"
-      ]
-     },
-     "execution_count": 5,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "chain.invoke(\"What is the total numbner of customers for DataDog?\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "8549b439-949e-4adf-bb52-3db7bf3f4c70",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "1a98689a-3bab-4535-bd13-05782ff5aea3",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 35,
-   "id": "f9a58eb6-fcd5-4f2b-ae86-891ebf6735c9",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import base64\n",
-    "import io\n",
-    "from pathlib import Path\n",
-    "\n",
-    "from langchain.retrievers.multi_vector import MultiVectorRetriever\n",
-    "from langchain.chat_models import ChatOpenAI\n",
-    "from langchain.pydantic_v1 import BaseModel\n",
-    "from langchain.schema.document import Document\n",
-    "from langchain.schema.messages import HumanMessage\n",
-    "from langchain.schema.output_parser import StrOutputParser\n",
-    "from langchain.schema.runnable import RunnableLambda, RunnablePassthrough\n",
-    "from langchain.vectorstores import Chroma\n",
-    "from langchain.embeddings import OpenAIEmbeddings\n",
-    "from langchain.storage import UpstashRedisByteStore\n",
-    "from PIL import Image\n",
-    "\n",
-    "\n",
-    "def resize_base64_image(base64_string, size=(128, 128)):\n",
-    "    \"\"\"\n",
-    "    Resize an image encoded as a Base64 string.\n",
-    "\n",
-    "    :param base64_string: A Base64 encoded string of the image to be resized.\n",
-    "    :param size: A tuple representing the new size (width, height) for the image.\n",
-    "    :return: A Base64 encoded string of the resized image.\n",
-    "    \"\"\"\n",
-    "    img_data = base64.b64decode(base64_string)\n",
-    "    img = Image.open(io.BytesIO(img_data))\n",
-    "    resized_img = img.resize(size, Image.LANCZOS)\n",
-    "    buffered = io.BytesIO()\n",
-    "    resized_img.save(buffered, format=img.format)\n",
-    "    return base64.b64encode(buffered.getvalue()).decode(\"utf-8\")\n",
-    "\n",
-    "\n",
-    "def get_resized_images(docs):\n",
-    "    \"\"\"\n",
-    "    Resize images from base64-encoded strings.\n",
-    "\n",
-    "    :param docs: A list of base64-encoded image to be resized.\n",
-    "    :return: Dict containing a list of resized base64-encoded strings.\n",
-    "    \"\"\"\n",
-    "    b64_images = []\n",
-    "    for doc in docs:\n",
-    "        # Convert from bytes and get b64 str from the Document JSON\n",
-    "        doc = json.loads(doc.decode('utf-8'))['kwargs']['page_content']\n",
-    "        resized_image = resize_base64_image(doc, size=(1280, 720))\n",
-    "        b64_images.append(resized_image)\n",
-    "    return {\"images\": b64_images}\n",
-    "\n",
-    "\n",
-    "def img_prompt_func(data_dict, num_images=2):\n",
-    "    \"\"\"\n",
-    "    GPT-4V prompt for image analysis.\n",
-    "\n",
-    "    :param data_dict: A dict with images and a user-provided question.\n",
-    "    :param num_images: Number of images to include in the prompt.\n",
-    "    :return: A list containing message objects for each image and the text prompt.\n",
-    "    \"\"\"\n",
-    "    messages = []\n",
-    "    if data_dict[\"context\"][\"images\"]:\n",
-    "        for image in data_dict[\"context\"][\"images\"][:num_images]:\n",
-    "            image_message = {\n",
-    "                \"type\": \"image_url\",\n",
-    "                \"image_url\": {\"url\": f\"data:image/jpeg;base64,{image}\"},\n",
-    "            }\n",
-    "            messages.append(image_message)\n",
-    "    text_message = {\n",
-    "        \"type\": \"text\",\n",
-    "        \"text\": (\n",
-    "            \"You are an analyst tasked with answering questions about visual content.\\n\"\n",
-    "            \"You will be give a set of image(s) from a slide deck / presentation.\\n\"\n",
-    "            \"Use this information to answer the user question. \\n\"\n",
-    "            f\"User-provided question: {data_dict['question']}\\n\\n\"\n",
-    "        ),\n",
-    "    }\n",
-    "    messages.append(text_message)\n",
-    "    return [HumanMessage(content=messages)]\n",
-    "\n",
-    "\n",
-    "def multi_modal_rag_chain(retriever):\n",
-    "    \"\"\"\n",
-    "    Multi-modal RAG chain,\n",
-    "\n",
-    "    :param retriever: A function that retrieves the necessary context for the model.\n",
-    "    :return: A chain of functions representing the multi-modal RAG process.\n",
-    "    \"\"\"\n",
-    "    # Initialize the multi-modal Large Language Model with specific parameters\n",
-    "    model = ChatOpenAI(temperature=0, model=\"gpt-4-vision-preview\", max_tokens=1024)\n",
-    "\n",
-    "    # Define the RAG pipeline\n",
-    "    chain = (\n",
-    "        {\n",
-    "            \"context\": retriever | RunnableLambda(get_resized_images),\n",
-    "            \"question\": RunnablePassthrough(),\n",
-    "        }\n",
-    "        | RunnableLambda(img_prompt_func)\n",
-    "        | model\n",
-    "        | StrOutputParser()\n",
-    "    )\n",
-    "\n",
-    "    return chain\n",
-    "\n",
-    "\n",
-    "# Load chroma\n",
-    "vectorstore_mvr = Chroma(\n",
-    "    collection_name=\"image_summaries\",\n",
-    "    persist_directory=\"chroma_db_multi_modal\",\n",
-    "    embedding_function=OpenAIEmbeddings()\n",
-    ")\n",
-    "\n",
-    "# Load redis\n",
-    "UPSTASH_URL = \"https://usw1-bright-beagle-34178.upstash.io\"\n",
-    "UPSTASH_TOKEN = \"AYWCACQgNzk3OTJjZTItMGIxNy00MTEzLWIyZTAtZWI0ZmI1ZGY0NjFhNGRhMGZjNDE4YjgxNGE4MTkzOWYxMzllM2MzZThlOGY=\"\n",
-    "store = UpstashRedisByteStore(url=UPSTASH_URL,\n",
-    "                            token=UPSTASH_TOKEN)\n",
-    "\n",
-    "id_key = \"doc_id\"\n",
-    "\n",
-    "# Create the multi-vector retriever\n",
-    "retriever = MultiVectorRetriever(\n",
-    "    vectorstore=vectorstore_mvr,\n",
-    "    docstore=store,\n",
-    "    id_key=id_key,\n",
-    ")\n",
-    "\n",
-    "# Create RAG chain\n",
-    "chain = multi_modal_rag_chain(retriever)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 43,
-   "id": "dbe8cf0c-91c6-4bb8-8514-342199260559",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "q = \"How many total customers does Datadog have?\"\n",
-    "docs = retriever.get_relevant_documents(q)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 44,
-   "id": "71d008e0-9629-4967-9063-dce31f8b5412",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "'Datadog has approximately 26,800 total customers.'"
-      ]
-     },
-     "execution_count": 44,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "chain.invoke(q)"
-   ]
   }
  ],
  "metadata": {
diff --git a/templates/rag-chroma-multi-modal-multi-vector/rag_chroma_multi_modal_multi_vector/chain.py b/templates/rag-chroma-multi-modal-multi-vector/rag_chroma_multi_modal_multi_vector/chain.py
index 59baf8df51f86..648486cbfb98d 100644
--- a/templates/rag-chroma-multi-modal-multi-vector/rag_chroma_multi_modal_multi_vector/chain.py
+++ b/templates/rag-chroma-multi-modal-multi-vector/rag_chroma_multi_modal_multi_vector/chain.py
@@ -1,17 +1,17 @@
 import base64
 import io
-import json
+import os
 from pathlib import Path
 
 from langchain.chat_models import ChatOpenAI
 from langchain.embeddings import OpenAIEmbeddings
 from langchain.pydantic_v1 import BaseModel
 from langchain.retrievers.multi_vector import MultiVectorRetriever
+from langchain.schema.document import Document
 from langchain.schema.messages import HumanMessage
 from langchain.schema.output_parser import StrOutputParser
 from langchain.schema.runnable import RunnableLambda, RunnablePassthrough
 from langchain.storage import UpstashRedisByteStore
-from langchain.schema.document import Document
 from langchain.vectorstores import Chroma
 from PIL import Image
 
@@ -43,7 +43,6 @@ def get_resized_images(docs):
     for doc in docs:
         if isinstance(doc, Document):
             doc = doc.page_content
-        # doc = json.loads(doc.decode("utf-8"))["kwargs"]["page_content"]
         resized_image = resize_base64_image(doc, size=(1280, 720))
         b64_images.append(resized_image)
     return {"images": b64_images}
@@ -110,8 +109,8 @@ def multi_modal_rag_chain(retriever):
 )
 
 # Load redis
-UPSTASH_URL = "https://usw1-bright-beagle-34178.upstash.io"
-UPSTASH_TOKEN = "AYWCACQgNzk3OTJjZTItMGIxNy00MTEzLWIyZTAtZWI0ZmI1ZGY0NjFhNGRhMGZjNDE4YjgxNGE4MTkzOWYxMzllM2MzZThlOGY="
+UPSTASH_URL = os.getenv("UPSTASH_URL")
+UPSTASH_TOKEN = os.getenv("UPSTASH_TOKEN")
 store = UpstashRedisByteStore(url=UPSTASH_URL, token=UPSTASH_TOKEN)
 id_key = "doc_id"