From b2132b8a6c2861dbc39724f10f3acb78189a83bf Mon Sep 17 00:00:00 2001
From: Yuki Sekiya <ysekiy@amazon.com>
Date: Mon, 15 May 2023 08:56:26 +0900
Subject: [PATCH 01/22] Modify parameters to SageMaker Endpoint

---
 samples/kendra_chat_flan_xxl.py      | 2 +-
 samples/kendra_retriever_flan_xxl.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/samples/kendra_chat_flan_xxl.py b/samples/kendra_chat_flan_xxl.py
index 685e94a..e21b989 100644
--- a/samples/kendra_chat_flan_xxl.py
+++ b/samples/kendra_chat_flan_xxl.py
@@ -30,7 +30,7 @@ class ContentHandler(ContentHandlerBase):
       accepts = "application/json"
 
       def transform_input(self, prompt: str, model_kwargs: dict) -> bytes:
-          input_str = json.dumps({"inputs": prompt, "parameters": model_kwargs})
+          input_str = json.dumps({"text_inputs": prompt, **model_kwargs})
           return input_str.encode('utf-8')
       
       def transform_output(self, output: bytes) -> str:
diff --git a/samples/kendra_retriever_flan_xxl.py b/samples/kendra_retriever_flan_xxl.py
index 75b2fad..8c1b899 100644
--- a/samples/kendra_retriever_flan_xxl.py
+++ b/samples/kendra_retriever_flan_xxl.py
@@ -18,7 +18,7 @@ class ContentHandler(ContentHandlerBase):
         accepts = "application/json"
 
         def transform_input(self, prompt: str, model_kwargs: dict) -> bytes:
-            input_str = json.dumps({"inputs": prompt, "parameters": model_kwargs})
+            input_str = json.dumps({"text_inputs": prompt, **model_kwargs})
             return input_str.encode('utf-8')
         
         def transform_output(self, output: bytes) -> str:

From 0a5716d0c55ddad3ae4fd90855987a35402f75a2 Mon Sep 17 00:00:00 2001
From: Yuki Sekiya <ysekiy@amazon.com>
Date: Mon, 15 May 2023 08:57:20 +0900
Subject: [PATCH 02/22] Modify key from SageMaker Endpoint to get generated
 data

---
 samples/kendra_chat_flan_xxl.py      | 2 +-
 samples/kendra_retriever_flan_xxl.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/samples/kendra_chat_flan_xxl.py b/samples/kendra_chat_flan_xxl.py
index e21b989..f828ba1 100644
--- a/samples/kendra_chat_flan_xxl.py
+++ b/samples/kendra_chat_flan_xxl.py
@@ -35,7 +35,7 @@ def transform_input(self, prompt: str, model_kwargs: dict) -> bytes:
       
       def transform_output(self, output: bytes) -> str:
           response_json = json.loads(output.read().decode("utf-8"))
-          return response_json[0]["generated_text"]
+          return response_json["generated_texts"][0]
 
   content_handler = ContentHandler()
 
diff --git a/samples/kendra_retriever_flan_xxl.py b/samples/kendra_retriever_flan_xxl.py
index 8c1b899..d34d272 100644
--- a/samples/kendra_retriever_flan_xxl.py
+++ b/samples/kendra_retriever_flan_xxl.py
@@ -23,7 +23,7 @@ def transform_input(self, prompt: str, model_kwargs: dict) -> bytes:
         
         def transform_output(self, output: bytes) -> str:
             response_json = json.loads(output.read().decode("utf-8"))
-            return response_json[0]["generated_text"]
+            return response_json["generated_texts"][0]
 
     content_handler = ContentHandler()
 

From 7798ea29d4dd779961f53af95d2086c879be049e Mon Sep 17 00:00:00 2001
From: jgalego <jgalego@amazon.pt>
Date: Tue, 16 May 2023 00:42:13 +0100
Subject: [PATCH 03/22] Added nb-compatible FLAN app and nb demo

---
 samples/genai-kendra-langchain.ipynb | 1143 ++++++++++++++++++++++++++
 samples/kendra_chat_flan_xl_nb.py    |  145 ++++
 samples/skip_kernel_extension.py     |   21 +
 3 files changed, 1309 insertions(+)
 create mode 100644 samples/genai-kendra-langchain.ipynb
 create mode 100644 samples/kendra_chat_flan_xl_nb.py
 create mode 100644 samples/skip_kernel_extension.py

diff --git a/samples/genai-kendra-langchain.ipynb b/samples/genai-kendra-langchain.ipynb
new file mode 100644
index 0000000..022825c
--- /dev/null
+++ b/samples/genai-kendra-langchain.ipynb
@@ -0,0 +1,1143 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "76653ab1-e168-45c7-8c45-2208e37f71d0",
+   "metadata": {},
+   "source": [
+    "## [GenAI applications on enterprise data with Amazon Kendra, LangChain and LLMs](https://aws.amazon.com/blogs/machine-learning/quickly-build-high-accuracy-generative-ai-applications-on-enterprise-data-using-amazon-kendra-langchain-and-large-language-models/)\n",
+    "\n",
+    "In this tutorial, we will demonstrate how to implement [Retrieval Augmented Generation](https://arxiv.org/abs/2005.11401) (RAG) workflows with [Amazon Kendra](https://aws.amazon.com/kendra/), [🦜️🔗 LangChain](https://python.langchain.com/en/latest/index.html) and state-of-the-art [Large Language Models](https://docs.cohere.com/docs/introduction-to-large-language-models) (LLM) to provide a conversational experience backed by data.\n",
+    "\n",
+    "> Visit the [Generative AI on AWS](https://aws.amazon.com/generative-ai/) landing page for the latest news on generative AI (GenAI) and learn how AWS is helping reinvent customer experiences and applications"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "id": "bb0f79b1-1124-43f7-a659-a6d1c249fa32",
+   "metadata": {},
+   "source": [
+    "### Architecture\n",
+    "\n",
+    "The diagram below shows the architecture of a GenAI application with a RAG approach:\n",
+    "\n",
+    "<img src=\"https://d2908q01vomqb2.cloudfront.net/f1f836cb4ea6efb2a0b1b99f41ad8b103eff4b59/2023/05/02/ML-13807-image001-new.png\" width=\"30%\"/>\n",
+    "\n",
+    "We use the [Amazon Kendra index](https://docs.aws.amazon.com/kendra/latest/dg/hiw-index.html) to hold large quantities of unstructured data from multiple [data sources](https://docs.aws.amazon.com/kendra/latest/dg/hiw-data-source.html), including:\n",
+    "\n",
+    "* Wiki pages\n",
+    "* [MS SharePoint sites](https://docs.aws.amazon.com/kendra/latest/dg/data-source-sharepoint.html)\n",
+    "* Document repositories like [Amazon S3](https://docs.aws.amazon.com/kendra/latest/dg/data-source-s3.html)\n",
+    "* ... *and much, much more!*\n",
+    "\n",
+    "Each time an user interacts with the GenAI app, the following will happen:\n",
+    "\n",
+    "1. The user makes a request to the GenAI app\n",
+    "2. The app issues a [search query](https://docs.aws.amazon.com/kendra/latest/dg/searching-example.html) to the Amazon Kendra index based on the user request\n",
+    "3. The index returns search results with excerpts of relevant documents from the ingested data\n",
+    "4. The app sends the user request along with the data retrieved from the index as context in the LLM prompt\n",
+    "5. The LLM returns a succint response to the user request based on the retrieved data\n",
+    "6. The response from the LLM is sent back to the user"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "7fceeb54-28ff-446e-9e66-2eb8c6d8464f",
+   "metadata": {},
+   "source": [
+    "### Prerequisites\n",
+    "\n",
+    "> **Note:** Tested with [Amazon SageMaker Studio](https://docs.aws.amazon.com/sagemaker/latest/dg/studio.html) on a `ml.t3.medium` (2 vCPU + 4 GiB) instance with the [Base Python 3.0 [`sagemaker-base-python-310`]](https://docs.aws.amazon.com/sagemaker/latest/dg/notebooks-available-images.html) image"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c22974f6-b724-4f28-be12-51eb8fad2344",
+   "metadata": {},
+   "source": [
+    "For this demo, we will need a Python version compatible with [🦜️🔗 LangChain](https://pypi.org/project/langchain/) (`>=3.8.1, <4.0`)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "093092c2-be80-4233-ba8e-6e8b6c9bd7d4",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "import sys\n",
+    "!{sys.executable} -V"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "28631311-8d7a-4ce4-a453-e00e14cda932",
+   "metadata": {},
+   "source": [
+    "**Optional:** we will also need the [AWS CLI](https://aws.amazon.com/cli/) (`v2`) to create the Kendra index\n",
+    "\n",
+    "> For more information on how to upgrade the AWS CLI, see [Installing or updating the latest version of the AWS CLI](https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html)\n",
+    "\n",
+    "> When running this notebook through Amazon SageMaker, make sure the [execution role](https://docs.aws.amazon.com/sagemaker/latest/dg/sagemaker-roles.html) has enough permissions to run the commands"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "488dda35-9873-4b2a-b476-0fa2bcf696e8",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "!aws --version"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "bcab366a-8d84-4c85-97b1-878ac574edae",
+   "metadata": {},
+   "source": [
+    "and a recent version of the [SageMaker Python SDK](https://sagemaker.readthedocs.io/en/stable/) (`>=2.154.0`), containing the [SageMaker JumpStart SDK](https://github.com/aws/sagemaker-python-sdk/releases/tag/v2.154.0), to deploy the LLM to a SageMaker Endpoint."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5210f2bc-b3c4-4789-954a-8b7e5e3e3bf6",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# Set pip options\n",
+    "%env PIP_DISABLE_PIP_VERSION_CHECK True\n",
+    "%env PIP_ROOT_USER_ACTION ignore\n",
+    "\n",
+    "# Install/update SageMaker Python SDK\n",
+    "!{sys.executable} -m pip install -qU \"sagemaker>=2.154.0\"\n",
+    "!python -c \"import sagemaker; print(sagemaker.__version__)\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ac1bf9d7-4f2f-4591-9208-1cf091daa8cc",
+   "metadata": {},
+   "source": [
+    "The variables below can be used to bypass **Optional** steps."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e7b404ed-d4de-4133-aca9-1ae01828db0f",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "%load_ext skip_kernel_extension\n",
+    "\n",
+    "# Whether to skip the Kendra index deployment\n",
+    "SKIP_KENDRA_DEPLOYMENT = False\n",
+    "\n",
+    "# Stack name for the Kendra index deployment\n",
+    "KENDRA_STACK_NAME = \"genai-kendra-langchain\"\n",
+    "\n",
+    "# Whether to skip the quota increase request\n",
+    "SKIP_QUOTA_INCREASE = True\n",
+    "\n",
+    "# Whether Streamlit should be installed\n",
+    "SKIP_STREAMLIT_INSTALL = False"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "193d8512-cd1c-4f74-81fc-4706aaa3a495",
+   "metadata": {},
+   "source": [
+    "### Implement a RAG Workflow"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b193c61d-1d51-40fb-bc22-fbdcd22d0c50",
+   "metadata": {},
+   "source": [
+    "The [AWS LangChain](https://github.com/aws-samples/amazon-kendra-langchain-extensions) repository contains a set of utility classes to work with LangChain, which includes a retriever class (`KendraIndexRetriever`) for working with a Kendra index and sample scripts to execute the Q&A chain for SageMaker, Open AI and Anthropic providers."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0e61f630-2685-4c51-9955-f344e1b47cdd",
+   "metadata": {},
+   "source": [
+    "**Optional:** deploy the provided AWS CloudFormation template ([`samples/kendra-docs-index.yaml`](https://github.com/aws-samples/amazon-kendra-langchain-extensions/blob/main/samples/kendra-docs-index.yaml)) to create a new Kendra index"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ade29089-ea81-4e04-be14-5e4aad4f030b",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "%%skip $SKIP_KENDRA_DEPLOYMENT\n",
+    "!aws cloudformation deploy --stack-name $KENDRA_STACK_NAME --template-file \"kendra-docs-index.yaml\" --capabilities CAPABILITY_NAMED_IAM"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ec4abc48-f25b-4805-bd5e-904ef231f358",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%skip $SKIP_KENDRA_DEPLOYMENT\n",
+    "!aws cloudformation describe-stacks --stack-name $KENDRA_STACK_NAME --query 'Stacks[0].Outputs[?OutputKey==`KendraIndexID`].OutputValue' --output text"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "59b7ee8a-b1d1-4747-a90a-5b2b3c1d8dbe",
+   "metadata": {},
+   "source": [
+    "**Optional:** consider requesting a quota increase via [AWS Service Quotas](https://docs.aws.amazon.com/general/latest/gr/aws_service_limits.html) on the size of the document excerpts returned by Amazon Kendra for a better experience"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "230aaa4e-2875-41c7-a56f-fc1db5b3e9ac",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "%%skip $SKIP_QUOTA_INCREASE\n",
+    "# Request a quota increase for the maximum number of characters displayed in the Document Excerpt of a Document type result in the Query API\n",
+    "# https://docs.aws.amazon.com/kendra/latest/APIReference/API_Query.html\n",
+    "!aws service-quotas request-service-quota-increase --service-code kendra --quota-code \"L-196E775D\" --desired-value 1000"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "id": "e570923b-efcc-4e3f-ab88-3afab8f17b79",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "**Optional:** Install Streamlit\n",
+    "\n",
+    "> [Streamlit](https://streamlit.io/) is an open source framework for building and sharing data apps. \n",
+    ">\n",
+    "> 💡 For a quick demo, try out the [Knowledge base > Tutorials](https://docs.streamlit.io/knowledge-base/tutorials)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "35987475-d40a-4720-8e32-096bc8286047",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "%%skip $SKIP_STREAMLIT_INSTALL\n",
+    "\n",
+    "# Install streamlit\n",
+    "# https://docs.streamlit.io/library/get-started/installation\n",
+    "!{sys.executable} -m pip install -qU streamlit\n",
+    "\n",
+    "# Debug installation\n",
+    "# https://docs.streamlit.io/knowledge-base/using-streamlit/sanity-checks\n",
+    "!streamlit version"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "id": "3f882417-9345-4483-a7fd-e945f319b152",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "Install 🦜️🔗 LangChain\n",
+    "\n",
+    "> [LangChain](https://github.com/hwchase17/langchain) is an open-source framework for building *agentic* and *data-aware* applications powered by language models.\n",
+    ">\n",
+    "> 💡 For a quick intro, check out [Getting Started with LangChain: A Beginner’s Guide to Building LLM-Powered Applications](https://towardsdatascience.com/getting-started-with-langchain-a-beginners-guide-to-building-llm-powered-applications-95fc8898732c)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "62a3aea9-5632-442e-8a41-441dd3fa7b7c",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# Install LangChain\n",
+    "# https://python.langchain.com/en/latest/getting_started/getting_started.html\n",
+    "!{sys.executable} -m pip install -qU \"langchain==0.0.137\"\n",
+    "\n",
+    "# Debug installation\n",
+    "!python -c \"import langchain; print(langchain.__version__)\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "07479ad8-f3c9-4510-8f86-7567bd6f6251",
+   "metadata": {},
+   "source": [
+    "Now we need an LLM to handle user queries. \n",
+    "\n",
+    "Models like [Flan-T5-XL](https://huggingface.co/google/flan-t5-xl) and [Flan-T5-XXL](https://huggingface.co/google/flan-t5-xxl), which are available on [Hugging Face Transformers](https://huggingface.co/docs/transformers/model_doc/flan-t5), can be deployed via [Amazon SageMaker JumpStart](https://aws.amazon.com/sagemaker/jumpstart/) in a matter of minutes with just a few lines of code.\n",
+    "\n",
+    "<img src=\"https://s3.amazonaws.com/moonup/production/uploads/1666363435475-62441d1d9fdefb55a0b7d12c.png\" width=\"50%\"/>"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1064441d-6db4-43a5-a518-a187a08740c6",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "from sagemaker.jumpstart.model import JumpStartModel\n",
+    "\n",
+    "# Select model\n",
+    "# https://aws.amazon.com/sagemaker/jumpstart/getting-started\n",
+    "model_id = str(input(\"Model ID:\") or \"huggingface-text2text-flan-t5-xl\")\n",
+    "\n",
+    "# Deploy model\n",
+    "model = JumpStartModel(model_id=model_id)\n",
+    "predictor = model.deploy()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9492a301-7299-46ca-a27f-08cf0bba3e59",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# Test model\n",
+    "predictor.predict(\"Hey there! How are you?\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "73ef0c04-10e9-41d9-8a20-993f02f91901",
+   "metadata": {},
+   "source": [
+    "**Optional:** if you want to work with [Anthropic's `Claude-V1`](https://www.anthropic.com/index/introducing-claude) or [OpenAI's `da-vinci-003`](da-vinci-003), get the corresponding API key(s) and run the cell below."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "18d958fa-4ed9-4e1d-a1cf-c8ba04b9b830",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "from getpass import getpass\n",
+    "\n",
+    "\"\"\"\n",
+    "OpenAI\n",
+    "https://python.langchain.com/en/latest/modules/models/llms/integrations/openai.html\n",
+    "\"\"\"\n",
+    "\n",
+    "# Get an API key from\n",
+    "# https://platform.openai.com/account/api-keys\n",
+    "OPENAI_API_KEY = getpass(\"OPENAI_API_KEY:\")\n",
+    "os.environ[\"OPENAI_API_KEY\"] = OPENAI_API_KEY\n",
+    "\n",
+    "\"\"\"\n",
+    "Anthropic\n",
+    "https://python.langchain.com/en/latest/modules/models/chat/integrations/anthropic.html\n",
+    "\"\"\"\n",
+    "\n",
+    "# Get an API key from\n",
+    "# https://www.anthropic.com/product\n",
+    "ANTHROPIC_API_KEY = getpass(\"ANTHROPIC_API_KEY:\")\n",
+    "os.environ[\"ANTHROPIC_API_KEY\"] = ANTHROPIC_API_KEY"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0631867b-cfba-457e-a5bd-f09ba60f969f",
+   "metadata": {},
+   "source": [
+    "Install the `KendraIndexRetriever` interface and sample applications"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2a46de18-8599-4300-a9d6-b88f19c316c3",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# Install classes\n",
+    "!{sys.executable} -m pip install -qU .."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "db5825a3-9fe0-4ca6-a1b3-bc4ed39305a7",
+   "metadata": {},
+   "source": [
+    "Before running the sample application, we need to set up the environment variables with the Amazon Kendra index details (`KENDRA_INDEX_ID`) and the SageMaker Endpoints for the `FLAN-T5-*` models (`FLAN_*_ENDPOINT`)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3fc6cc1c-a0ce-417c-a92f-bd1344132025",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "import re\n",
+    "\n",
+    "# Set Kendra index ID\n",
+    "os.environ['KENDRA_INDEX_ID'] = input('KENDRA_INDEX_ID:')\n",
+    "\n",
+    "# Set endpoint name\n",
+    "# https://github.com/aws/amazon-sagemaker-examples/blob/main/introduction_to_amazon_algorithms/jumpstart-foundation-models/text2text-generation-flan-t5.ipynb\n",
+    "if re.search(\"flan-t5-xl\", model_id):\n",
+    "    os.environ['FLAN_XL_ENDPOINT'] = predictor.endpoint_name\n",
+    "elif re.search(\"flan-t5-xxl\", model_id):\n",
+    "    os.environ['FLAN_XXL_ENDPOINT'] = predictor.endpoint_name\n",
+    "elif \"OPENAI_API_KEY\" in os.environ or \"ANTHROPIC_API_KEY\" in os.environ:\n",
+    "    print(\"Using external API key\")\n",
+    "else:\n",
+    "    print(\"⚠️ The SageMaker Endpoint environment variable is not set!\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "64a8fdc8-dd0c-4a9d-bb5c-b812221313e5",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "Finally, let's start the application 😊"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b30fb0e2-f2af-4f5a-b8a0-d0d706b5d984",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# Python\n",
+    "%env FLAN_XL_ENDPOINT hf-text2text-flan-t5-xl-2023-05-15-18-36-03-147\n",
+    "%run kendra_chat_flan_xl_nb.py \"What is Amazon Lex?\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fb063d0f-515e-4f0d-97b2-95c65ca1ea01",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# Streamlit\n",
+    "!streamlit run app.py flanxl"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "8f3000db-84b3-46fe-b6bc-a354ed8dcd18",
+   "metadata": {},
+   "source": [
+    "> **Note:** As of May 2023, Amazon SageMaker Studio doesn't allow apps to run through Jupyter Server Proxy on a Kernel Gateway. The best option is to use the [SageMaker SSH Helper](https://github.com/aws-samples/sagemaker-ssh-helper) library to do port forwarding to `server.port` (defaults to `8501`) cf. [Local IDE integration with SageMaker Studio over SSH for PyCharm / VSCode](https://github.com/aws-samples/sagemaker-ssh-helper#local-ide-integration-with-sagemaker-studio-over-ssh-for-pycharm--vscode) for more information."
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "id": "82a59dbc-55ba-4a15-b0f1-3e2d764d7fc5",
+   "metadata": {},
+   "source": [
+    "<img src=\"https://d2908q01vomqb2.cloudfront.net/f1f836cb4ea6efb2a0b1b99f41ad8b103eff4b59/2023/04/25/ML-13807-image005.jpg\" width=\"30%\"/>"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5299dc29-fa23-407e-aba0-aea056979246",
+   "metadata": {},
+   "source": [
+    "### Cleanup\n",
+    "\n",
+    "Don't forget to delete the SageMaker Endpoint"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8e2233be-e5e9-4c63-a694-605bf08bf46c",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "predictor.delete_endpoint()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e47d328a-e236-4b6d-8462-59a38316f347",
+   "metadata": {},
+   "source": [
+    "and the Kendra index"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f842c617-b74e-46b1-b7c7-79f2397657da",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "%%skip $SKIP_KENDRA_DEPLOYMENT\n",
+    "!aws cloudformation delete-stack --stack-name $KENDRA_STACK_NAME"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d4feb917-1844-42bf-ba63-1f090173b389",
+   "metadata": {},
+   "source": [
+    "### References 📚\n",
+    "\n",
+    "* AWS ML Blog: [Quickly build high-accuracy Generative AI applications on enterprise data using Amazon Kendra, LangChain, and large language models](https://aws.amazon.com/blogs/machine-learning/quickly-build-high-accuracy-generative-ai-applications-on-enterprise-data-using-amazon-kendra-langchain-and-large-language-models/)\n",
+    "* AWS ML Blog: [Question answering using Retrieval Augmented Generation with foundation models in Amazon SageMaker JumpStart](https://aws.amazon.com/blogs/machine-learning/question-answering-using-retrieval-augmented-generation-with-foundation-models-in-amazon-sagemaker-jumpstart/)\n",
+    "* AWS ML Blog: [Dive deep into Amazon SageMaker Studio Notebooks architecture](https://aws.amazon.com/blogs/machine-learning/dive-deep-into-amazon-sagemaker-studio-notebook-architecture/)"
+   ]
+  }
+ ],
+ "metadata": {
+  "availableInstances": [
+   {
+    "_defaultOrder": 0,
+    "_isFastLaunch": true,
+    "category": "General purpose",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 4,
+    "name": "ml.t3.medium",
+    "vcpuNum": 2
+   },
+   {
+    "_defaultOrder": 1,
+    "_isFastLaunch": false,
+    "category": "General purpose",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 8,
+    "name": "ml.t3.large",
+    "vcpuNum": 2
+   },
+   {
+    "_defaultOrder": 2,
+    "_isFastLaunch": false,
+    "category": "General purpose",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 16,
+    "name": "ml.t3.xlarge",
+    "vcpuNum": 4
+   },
+   {
+    "_defaultOrder": 3,
+    "_isFastLaunch": false,
+    "category": "General purpose",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 32,
+    "name": "ml.t3.2xlarge",
+    "vcpuNum": 8
+   },
+   {
+    "_defaultOrder": 4,
+    "_isFastLaunch": true,
+    "category": "General purpose",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 8,
+    "name": "ml.m5.large",
+    "vcpuNum": 2
+   },
+   {
+    "_defaultOrder": 5,
+    "_isFastLaunch": false,
+    "category": "General purpose",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 16,
+    "name": "ml.m5.xlarge",
+    "vcpuNum": 4
+   },
+   {
+    "_defaultOrder": 6,
+    "_isFastLaunch": false,
+    "category": "General purpose",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 32,
+    "name": "ml.m5.2xlarge",
+    "vcpuNum": 8
+   },
+   {
+    "_defaultOrder": 7,
+    "_isFastLaunch": false,
+    "category": "General purpose",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 64,
+    "name": "ml.m5.4xlarge",
+    "vcpuNum": 16
+   },
+   {
+    "_defaultOrder": 8,
+    "_isFastLaunch": false,
+    "category": "General purpose",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 128,
+    "name": "ml.m5.8xlarge",
+    "vcpuNum": 32
+   },
+   {
+    "_defaultOrder": 9,
+    "_isFastLaunch": false,
+    "category": "General purpose",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 192,
+    "name": "ml.m5.12xlarge",
+    "vcpuNum": 48
+   },
+   {
+    "_defaultOrder": 10,
+    "_isFastLaunch": false,
+    "category": "General purpose",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 256,
+    "name": "ml.m5.16xlarge",
+    "vcpuNum": 64
+   },
+   {
+    "_defaultOrder": 11,
+    "_isFastLaunch": false,
+    "category": "General purpose",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 384,
+    "name": "ml.m5.24xlarge",
+    "vcpuNum": 96
+   },
+   {
+    "_defaultOrder": 12,
+    "_isFastLaunch": false,
+    "category": "General purpose",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 8,
+    "name": "ml.m5d.large",
+    "vcpuNum": 2
+   },
+   {
+    "_defaultOrder": 13,
+    "_isFastLaunch": false,
+    "category": "General purpose",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 16,
+    "name": "ml.m5d.xlarge",
+    "vcpuNum": 4
+   },
+   {
+    "_defaultOrder": 14,
+    "_isFastLaunch": false,
+    "category": "General purpose",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 32,
+    "name": "ml.m5d.2xlarge",
+    "vcpuNum": 8
+   },
+   {
+    "_defaultOrder": 15,
+    "_isFastLaunch": false,
+    "category": "General purpose",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 64,
+    "name": "ml.m5d.4xlarge",
+    "vcpuNum": 16
+   },
+   {
+    "_defaultOrder": 16,
+    "_isFastLaunch": false,
+    "category": "General purpose",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 128,
+    "name": "ml.m5d.8xlarge",
+    "vcpuNum": 32
+   },
+   {
+    "_defaultOrder": 17,
+    "_isFastLaunch": false,
+    "category": "General purpose",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 192,
+    "name": "ml.m5d.12xlarge",
+    "vcpuNum": 48
+   },
+   {
+    "_defaultOrder": 18,
+    "_isFastLaunch": false,
+    "category": "General purpose",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 256,
+    "name": "ml.m5d.16xlarge",
+    "vcpuNum": 64
+   },
+   {
+    "_defaultOrder": 19,
+    "_isFastLaunch": false,
+    "category": "General purpose",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 384,
+    "name": "ml.m5d.24xlarge",
+    "vcpuNum": 96
+   },
+   {
+    "_defaultOrder": 20,
+    "_isFastLaunch": false,
+    "category": "General purpose",
+    "gpuNum": 0,
+    "hideHardwareSpecs": true,
+    "memoryGiB": 0,
+    "name": "ml.geospatial.interactive",
+    "supportedImageNames": [
+     "sagemaker-geospatial-v1-0"
+    ],
+    "vcpuNum": 0
+   },
+   {
+    "_defaultOrder": 21,
+    "_isFastLaunch": true,
+    "category": "Compute optimized",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 4,
+    "name": "ml.c5.large",
+    "vcpuNum": 2
+   },
+   {
+    "_defaultOrder": 22,
+    "_isFastLaunch": false,
+    "category": "Compute optimized",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 8,
+    "name": "ml.c5.xlarge",
+    "vcpuNum": 4
+   },
+   {
+    "_defaultOrder": 23,
+    "_isFastLaunch": false,
+    "category": "Compute optimized",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 16,
+    "name": "ml.c5.2xlarge",
+    "vcpuNum": 8
+   },
+   {
+    "_defaultOrder": 24,
+    "_isFastLaunch": false,
+    "category": "Compute optimized",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 32,
+    "name": "ml.c5.4xlarge",
+    "vcpuNum": 16
+   },
+   {
+    "_defaultOrder": 25,
+    "_isFastLaunch": false,
+    "category": "Compute optimized",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 72,
+    "name": "ml.c5.9xlarge",
+    "vcpuNum": 36
+   },
+   {
+    "_defaultOrder": 26,
+    "_isFastLaunch": false,
+    "category": "Compute optimized",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 96,
+    "name": "ml.c5.12xlarge",
+    "vcpuNum": 48
+   },
+   {
+    "_defaultOrder": 27,
+    "_isFastLaunch": false,
+    "category": "Compute optimized",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 144,
+    "name": "ml.c5.18xlarge",
+    "vcpuNum": 72
+   },
+   {
+    "_defaultOrder": 28,
+    "_isFastLaunch": false,
+    "category": "Compute optimized",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 192,
+    "name": "ml.c5.24xlarge",
+    "vcpuNum": 96
+   },
+   {
+    "_defaultOrder": 29,
+    "_isFastLaunch": true,
+    "category": "Accelerated computing",
+    "gpuNum": 1,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 16,
+    "name": "ml.g4dn.xlarge",
+    "vcpuNum": 4
+   },
+   {
+    "_defaultOrder": 30,
+    "_isFastLaunch": false,
+    "category": "Accelerated computing",
+    "gpuNum": 1,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 32,
+    "name": "ml.g4dn.2xlarge",
+    "vcpuNum": 8
+   },
+   {
+    "_defaultOrder": 31,
+    "_isFastLaunch": false,
+    "category": "Accelerated computing",
+    "gpuNum": 1,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 64,
+    "name": "ml.g4dn.4xlarge",
+    "vcpuNum": 16
+   },
+   {
+    "_defaultOrder": 32,
+    "_isFastLaunch": false,
+    "category": "Accelerated computing",
+    "gpuNum": 1,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 128,
+    "name": "ml.g4dn.8xlarge",
+    "vcpuNum": 32
+   },
+   {
+    "_defaultOrder": 33,
+    "_isFastLaunch": false,
+    "category": "Accelerated computing",
+    "gpuNum": 4,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 192,
+    "name": "ml.g4dn.12xlarge",
+    "vcpuNum": 48
+   },
+   {
+    "_defaultOrder": 34,
+    "_isFastLaunch": false,
+    "category": "Accelerated computing",
+    "gpuNum": 1,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 256,
+    "name": "ml.g4dn.16xlarge",
+    "vcpuNum": 64
+   },
+   {
+    "_defaultOrder": 35,
+    "_isFastLaunch": false,
+    "category": "Accelerated computing",
+    "gpuNum": 1,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 61,
+    "name": "ml.p3.2xlarge",
+    "vcpuNum": 8
+   },
+   {
+    "_defaultOrder": 36,
+    "_isFastLaunch": false,
+    "category": "Accelerated computing",
+    "gpuNum": 4,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 244,
+    "name": "ml.p3.8xlarge",
+    "vcpuNum": 32
+   },
+   {
+    "_defaultOrder": 37,
+    "_isFastLaunch": false,
+    "category": "Accelerated computing",
+    "gpuNum": 8,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 488,
+    "name": "ml.p3.16xlarge",
+    "vcpuNum": 64
+   },
+   {
+    "_defaultOrder": 38,
+    "_isFastLaunch": false,
+    "category": "Accelerated computing",
+    "gpuNum": 8,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 768,
+    "name": "ml.p3dn.24xlarge",
+    "vcpuNum": 96
+   },
+   {
+    "_defaultOrder": 39,
+    "_isFastLaunch": false,
+    "category": "Memory Optimized",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 16,
+    "name": "ml.r5.large",
+    "vcpuNum": 2
+   },
+   {
+    "_defaultOrder": 40,
+    "_isFastLaunch": false,
+    "category": "Memory Optimized",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 32,
+    "name": "ml.r5.xlarge",
+    "vcpuNum": 4
+   },
+   {
+    "_defaultOrder": 41,
+    "_isFastLaunch": false,
+    "category": "Memory Optimized",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 64,
+    "name": "ml.r5.2xlarge",
+    "vcpuNum": 8
+   },
+   {
+    "_defaultOrder": 42,
+    "_isFastLaunch": false,
+    "category": "Memory Optimized",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 128,
+    "name": "ml.r5.4xlarge",
+    "vcpuNum": 16
+   },
+   {
+    "_defaultOrder": 43,
+    "_isFastLaunch": false,
+    "category": "Memory Optimized",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 256,
+    "name": "ml.r5.8xlarge",
+    "vcpuNum": 32
+   },
+   {
+    "_defaultOrder": 44,
+    "_isFastLaunch": false,
+    "category": "Memory Optimized",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 384,
+    "name": "ml.r5.12xlarge",
+    "vcpuNum": 48
+   },
+   {
+    "_defaultOrder": 45,
+    "_isFastLaunch": false,
+    "category": "Memory Optimized",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 512,
+    "name": "ml.r5.16xlarge",
+    "vcpuNum": 64
+   },
+   {
+    "_defaultOrder": 46,
+    "_isFastLaunch": false,
+    "category": "Memory Optimized",
+    "gpuNum": 0,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 768,
+    "name": "ml.r5.24xlarge",
+    "vcpuNum": 96
+   },
+   {
+    "_defaultOrder": 47,
+    "_isFastLaunch": false,
+    "category": "Accelerated computing",
+    "gpuNum": 1,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 16,
+    "name": "ml.g5.xlarge",
+    "vcpuNum": 4
+   },
+   {
+    "_defaultOrder": 48,
+    "_isFastLaunch": false,
+    "category": "Accelerated computing",
+    "gpuNum": 1,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 32,
+    "name": "ml.g5.2xlarge",
+    "vcpuNum": 8
+   },
+   {
+    "_defaultOrder": 49,
+    "_isFastLaunch": false,
+    "category": "Accelerated computing",
+    "gpuNum": 1,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 64,
+    "name": "ml.g5.4xlarge",
+    "vcpuNum": 16
+   },
+   {
+    "_defaultOrder": 50,
+    "_isFastLaunch": false,
+    "category": "Accelerated computing",
+    "gpuNum": 1,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 128,
+    "name": "ml.g5.8xlarge",
+    "vcpuNum": 32
+   },
+   {
+    "_defaultOrder": 51,
+    "_isFastLaunch": false,
+    "category": "Accelerated computing",
+    "gpuNum": 1,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 256,
+    "name": "ml.g5.16xlarge",
+    "vcpuNum": 64
+   },
+   {
+    "_defaultOrder": 52,
+    "_isFastLaunch": false,
+    "category": "Accelerated computing",
+    "gpuNum": 4,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 192,
+    "name": "ml.g5.12xlarge",
+    "vcpuNum": 48
+   },
+   {
+    "_defaultOrder": 53,
+    "_isFastLaunch": false,
+    "category": "Accelerated computing",
+    "gpuNum": 4,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 384,
+    "name": "ml.g5.24xlarge",
+    "vcpuNum": 96
+   },
+   {
+    "_defaultOrder": 54,
+    "_isFastLaunch": false,
+    "category": "Accelerated computing",
+    "gpuNum": 8,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 768,
+    "name": "ml.g5.48xlarge",
+    "vcpuNum": 192
+   },
+   {
+    "_defaultOrder": 55,
+    "_isFastLaunch": false,
+    "category": "Accelerated computing",
+    "gpuNum": 8,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 1152,
+    "name": "ml.p4d.24xlarge",
+    "vcpuNum": 96
+   },
+   {
+    "_defaultOrder": 56,
+    "_isFastLaunch": false,
+    "category": "Accelerated computing",
+    "gpuNum": 8,
+    "hideHardwareSpecs": false,
+    "memoryGiB": 1152,
+    "name": "ml.p4de.24xlarge",
+    "vcpuNum": 96
+   }
+  ],
+  "instance_type": "ml.t3.medium",
+  "kernelspec": {
+   "display_name": "Python 3 (Base Python 3.0)",
+   "language": "python",
+   "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-east-1:081325390199:image/sagemaker-base-python-310-v1"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/samples/kendra_chat_flan_xl_nb.py b/samples/kendra_chat_flan_xl_nb.py
new file mode 100644
index 0000000..ff57489
--- /dev/null
+++ b/samples/kendra_chat_flan_xl_nb.py
@@ -0,0 +1,145 @@
+# pylint: disable=invalid-name,line-too-long
+"""
+Adapted from
+https://github.com/aws-samples/amazon-kendra-langchain-extensions/blob/main/samples/kendra_chat_flan_xl.py
+"""
+
+import json
+import os
+
+from langchain.chains import ConversationalRetrievalChain
+from langchain.prompts import PromptTemplate
+from langchain import SagemakerEndpoint
+from langchain.llms.sagemaker_endpoint import ContentHandlerBase
+
+from aws_langchain.kendra_index_retriever import KendraIndexRetriever
+
+class bcolors:  #pylint: disable=too-few-public-methods
+    """
+    ANSI escape sequences
+    https://stackoverflow.com/questions/287871/how-do-i-print-colored-text-to-the-terminal
+    """
+    HEADER = '\033[95m'
+    OKBLUE = '\033[94m'
+    OKCYAN = '\033[96m'
+    OKGREEN = '\033[92m'
+    WARNING = '\033[93m'
+    FAIL = '\033[91m'
+    ENDC = '\033[0m'
+    BOLD = '\033[1m'
+    UNDERLINE = '\033[4m'
+
+MAX_HISTORY_LENGTH = 5
+
+def build_chain():
+    """
+    Builds the LangChain chain
+    """
+    region = os.environ["AWS_REGION"]
+    kendra_index_id = os.environ["KENDRA_INDEX_ID"]
+    endpoint_name = os.environ["FLAN_XL_ENDPOINT"]
+
+    class ContentHandler(ContentHandlerBase):
+        """
+        Handler class to transform input and ouput
+        into a format that the SageMaker Endpoint can understand
+        """
+        content_type = "application/json"
+        accepts = "application/json"
+
+        def transform_input(self, prompt: str, model_kwargs: dict) -> bytes:
+            input_str = json.dumps({"text_inputs": prompt, **model_kwargs})
+            return input_str.encode('utf-8')
+
+        def transform_output(self, output: bytes) -> str:
+            response_json = json.loads(output.read().decode("utf-8"))
+            return response_json["generated_texts"][0]
+
+    content_handler = ContentHandler()
+
+    # Initialize LLM hosted on a SageMaker endpoint
+    # https://python.langchain.com/en/latest/modules/models/llms/integrations/sagemaker.html
+    llm=SagemakerEndpoint(
+        endpoint_name=endpoint_name,
+        region_name="us-east-1",
+        model_kwargs={"temperature":1e-10, "max_length": 500},
+        content_handler=content_handler
+    )
+
+    # Initialize Kendra index retriever
+    retriever = KendraIndexRetriever(
+       kendraindex=kendra_index_id,
+       awsregion=region,
+       return_source_documents=True
+    )
+
+    # Define prompt template
+    # https://python.langchain.com/en/latest/modules/prompts/prompt_templates.html
+    prompt_template = """
+The following is a friendly conversation between a human and an AI. 
+The AI is talkative and provides lots of specific details from its context.
+If the AI does not know the answer to a question, it truthfully says it 
+does not know.
+{context}
+Instruction: Based on the above documents, provide a detailed answer for,
+{question} Answer "don't know" if not present in the document. Solution:
+"""
+    qa_prompt = PromptTemplate(
+        template=prompt_template, input_variables=["context", "question"]
+    )
+
+    # Initialize QA chain with chat history
+    # https://python.langchain.com/en/latest/modules/chains/index_examples/chat_vector_db.html
+    qa = ConversationalRetrievalChain.from_llm(  #
+        llm=llm,
+        retriever=retriever,
+        qa_prompt=qa_prompt,
+        return_source_documents=True
+    )
+
+    return qa
+
+def run_chain(chain, prompt: str, history=None):
+    """
+    Runs the Q&A chain given a user prompt and chat history
+    """
+    if history is None:
+        history = []
+    return chain({"question": prompt, "chat_history": history})
+
+def prompt_user():
+    """
+    Helper function to get user input
+    """
+    print(f"{bcolors.OKBLUE}Hello! How can I help you?{bcolors.ENDC}")
+    print(f"{bcolors.OKCYAN}Ask a question, start a New search: or Stop cell execution to exit.{bcolors.ENDC}")
+    return input(">")
+
+if __name__ == "__main__":
+    # Initialize chat history
+    chat_history = []
+
+    # Initialize Q&A chain
+    qa_chain = build_chain()
+
+    try:
+        while query := prompt_user():
+            # Process user input in case of a new search
+            if query.strip().lower().startswith("new search:"):
+                query = query.strip().lower().replace("new search:", "")
+                chat_history = []
+            if len(chat_history) == MAX_HISTORY_LENGTH:
+                chat_history.pop(0)
+
+            # Show answer and keep a record
+            result = run_chain(qa_chain, query, chat_history)
+            chat_history.append((query, result["answer"]))
+            print(f"{bcolors.OKGREEN}{result['answer']}{bcolors.ENDC}")
+
+            # Show sources
+            if 'source_documents' in result:
+                print(bcolors.OKGREEN + 'Sources:')
+                for doc in result['source_documents']:
+                    print(f"+ {doc.metadata['source']}")
+    except KeyboardInterrupt:
+        pass
diff --git a/samples/skip_kernel_extension.py b/samples/skip_kernel_extension.py
new file mode 100644
index 0000000..14f7496
--- /dev/null
+++ b/samples/skip_kernel_extension.py
@@ -0,0 +1,21 @@
+"""
+Custom kernel extension to skill cell execution
+
+Adapted from
+https://stackoverflow.com/questions/26494747/simple-way-to-choose-which-cells-to-run-in-ipython-notebook-during-run-all
+"""
+
+def skip(line, cell=None):
+    '''Skips execution of the current line/cell if line evaluates to True.'''
+    if eval(line):
+        return
+
+    get_ipython().run_cell(cell)
+
+def load_ipython_extension(shell):
+    '''Registers the skip magic when the extension loads.'''
+    shell.register_magic_function(skip, 'line_cell')
+
+def unload_ipython_extension(shell):
+    '''Unregisters the skip magic when the extension unloads.'''
+    del shell.magics_manager.magics['cell']['skip']
\ No newline at end of file

From 03c7fc15524044d4dad6f6cb24213843105f05d5 Mon Sep 17 00:00:00 2001
From: jgalego <jgalego@amazon.pt>
Date: Tue, 16 May 2023 09:32:50 +0100
Subject: [PATCH 04/22] Fixed console app command

---
 samples/genai-kendra-langchain.ipynb | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/samples/genai-kendra-langchain.ipynb b/samples/genai-kendra-langchain.ipynb
index 022825c..a4edafc 100644
--- a/samples/genai-kendra-langchain.ipynb
+++ b/samples/genai-kendra-langchain.ipynb
@@ -453,8 +453,7 @@
    "outputs": [],
    "source": [
     "# Python\n",
-    "%env FLAN_XL_ENDPOINT hf-text2text-flan-t5-xl-2023-05-15-18-36-03-147\n",
-    "%run kendra_chat_flan_xl_nb.py \"What is Amazon Lex?\""
+    "%run kendra_chat_flan_xl_nb.py"
    ]
   },
   {

From fff2618ad80959598e0dbd332e8eee8a62cb9226 Mon Sep 17 00:00:00 2001
From: jgalego <jgalego@amazon.pt>
Date: Tue, 16 May 2023 09:38:53 +0100
Subject: [PATCH 05/22] Updated skip kernel extension docstring

---
 samples/skip_kernel_extension.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/samples/skip_kernel_extension.py b/samples/skip_kernel_extension.py
index 14f7496..b688f3a 100644
--- a/samples/skip_kernel_extension.py
+++ b/samples/skip_kernel_extension.py
@@ -1,7 +1,8 @@
 """
-Custom kernel extension to skill cell execution
+Custom kernel extension to add %%skip magic and control cell execution
 
 Adapted from
+https://github.com/ipython/ipython/issues/11582
 https://stackoverflow.com/questions/26494747/simple-way-to-choose-which-cells-to-run-in-ipython-notebook-during-run-all
 """
 

From c31445baaaef617ed1a1e309f1df189997f6459e Mon Sep 17 00:00:00 2001
From: Yuki Sekiya <ysekiy@amazon.com>
Date: Fri, 9 Jun 2023 14:44:43 +0900
Subject: [PATCH 06/22] change hardcoded value in flant to arg

---
 samples/kendra_chat_flan_xl.py       | 2 +-
 samples/kendra_chat_flan_xxl.py      | 2 +-
 samples/kendra_retriever_flan_xl.py  | 2 +-
 samples/kendra_retriever_flan_xxl.py | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/samples/kendra_chat_flan_xl.py b/samples/kendra_chat_flan_xl.py
index df305f3..3b25623 100644
--- a/samples/kendra_chat_flan_xl.py
+++ b/samples/kendra_chat_flan_xl.py
@@ -41,7 +41,7 @@ def transform_output(self, output: bytes) -> str:
 
   llm=SagemakerEndpoint(
           endpoint_name=endpoint_name, 
-          region_name="us-east-1", 
+          region_name=region, 
           model_kwargs={"temperature":1e-10, "max_length": 500},
           content_handler=content_handler
       )
diff --git a/samples/kendra_chat_flan_xxl.py b/samples/kendra_chat_flan_xxl.py
index f828ba1..86627bc 100644
--- a/samples/kendra_chat_flan_xxl.py
+++ b/samples/kendra_chat_flan_xxl.py
@@ -41,7 +41,7 @@ def transform_output(self, output: bytes) -> str:
 
   llm=SagemakerEndpoint(
           endpoint_name=endpoint_name, 
-          region_name="us-east-1", 
+          region_name=region, 
           model_kwargs={"temperature":1e-10, "max_length": 500},
           content_handler=content_handler
       )
diff --git a/samples/kendra_retriever_flan_xl.py b/samples/kendra_retriever_flan_xl.py
index 089cd7d..d2fd7fd 100644
--- a/samples/kendra_retriever_flan_xl.py
+++ b/samples/kendra_retriever_flan_xl.py
@@ -29,7 +29,7 @@ def transform_output(self, output: bytes) -> str:
 
     llm=SagemakerEndpoint(
             endpoint_name=endpoint_name, 
-            region_name="us-east-1", 
+            region_name=region, 
             model_kwargs={"temperature":1e-10, "max_length": 500},
             content_handler=content_handler
         )
diff --git a/samples/kendra_retriever_flan_xxl.py b/samples/kendra_retriever_flan_xxl.py
index d34d272..3be1cdf 100644
--- a/samples/kendra_retriever_flan_xxl.py
+++ b/samples/kendra_retriever_flan_xxl.py
@@ -29,7 +29,7 @@ def transform_output(self, output: bytes) -> str:
 
     llm=SagemakerEndpoint(
             endpoint_name=endpoint_name, 
-            region_name="us-east-1", 
+            region_name=region, 
             model_kwargs={"temperature":1e-10, "max_length": 500},
             content_handler=content_handler
         )

From 0da27c0679a15324c785210a1c478e0ff2976396 Mon Sep 17 00:00:00 2001
From: Shing Lyu <shinglyu@amazon.nl>
Date: Wed, 5 Jul 2023 14:29:36 +0000
Subject: [PATCH 07/22] Fix: fix FLAN-XXL input/output format

---
 kendra_retriever_samples/kendra_chat_flan_xxl.py      | 5 +++--
 kendra_retriever_samples/kendra_retriever_flan_xxl.py | 4 ++--
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/kendra_retriever_samples/kendra_chat_flan_xxl.py b/kendra_retriever_samples/kendra_chat_flan_xxl.py
index dd5d237..aff9a9e 100644
--- a/kendra_retriever_samples/kendra_chat_flan_xxl.py
+++ b/kendra_retriever_samples/kendra_chat_flan_xxl.py
@@ -30,12 +30,13 @@ class ContentHandler(LLMContentHandler):
       accepts = "application/json"
 
       def transform_input(self, prompt: str, model_kwargs: dict) -> bytes:
-          input_str = json.dumps({"inputs": prompt, "parameters": model_kwargs})
+          input_str = json.dumps({"text_inputs": prompt, **model_kwargs})
           return input_str.encode('utf-8')
       
       def transform_output(self, output: bytes) -> str:
           response_json = json.loads(output.read().decode("utf-8"))
-          return response_json[0]["generated_text"]
+          print(response_json)
+          return response_json["generated_texts"][0]
 
   content_handler = ContentHandler()
 
diff --git a/kendra_retriever_samples/kendra_retriever_flan_xxl.py b/kendra_retriever_samples/kendra_retriever_flan_xxl.py
index e10693c..8d61f8b 100644
--- a/kendra_retriever_samples/kendra_retriever_flan_xxl.py
+++ b/kendra_retriever_samples/kendra_retriever_flan_xxl.py
@@ -18,12 +18,12 @@ class ContentHandler(LLMContentHandler):
         accepts = "application/json"
 
         def transform_input(self, prompt: str, model_kwargs: dict) -> bytes:
-            input_str = json.dumps({"inputs": prompt, "parameters": model_kwargs})
+            input_str = json.dumps({"text_inputs": prompt, **model_kwargs})
             return input_str.encode('utf-8')
         
         def transform_output(self, output: bytes) -> str:
             response_json = json.loads(output.read().decode("utf-8"))
-            return response_json[0]["generated_text"]
+            return response_json["generated_texts"][0]
 
     content_handler = ContentHandler()
 

From b184dff1164209c0b6fc9576c54b0f1f90a99b00 Mon Sep 17 00:00:00 2001
From: hako884 <hayayakko.0218@outlook.jp>
Date: Tue, 18 Jul 2023 09:16:32 +0900
Subject: [PATCH 08/22] Fix: kendra_retriever_samples/kendra_chat_open_ai.py

---
 kendra_retriever_samples/kendra_chat_open_ai.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kendra_retriever_samples/kendra_chat_open_ai.py b/kendra_retriever_samples/kendra_chat_open_ai.py
index 9615ee9..9df2d31 100644
--- a/kendra_retriever_samples/kendra_chat_open_ai.py
+++ b/kendra_retriever_samples/kendra_chat_open_ai.py
@@ -13,7 +13,7 @@ def build_chain():
 
   llm = OpenAI(batch_size=5, temperature=0, max_tokens=300)
       
-  retriever = AmazonKendraRetriever(index_id=kendra_index_id)
+  retriever = AmazonKendraRetriever(index_id=kendra_index_id, region_name=region)
 
   prompt_template = """
   The following is a friendly conversation between a human and an AI. 

From 435876edc9c4645c1f59a138de735340403633e6 Mon Sep 17 00:00:00 2001
From: EC2 Default User
 <ec2-user@ip-172-31-47-166.ap-southeast-2.compute.internal>
Date: Wed, 19 Jul 2023 02:54:52 +0000
Subject: [PATCH 09/22] Added Llama 2 integration

---
 .gitignore                                    |   5 +-
 kendra_retriever_samples/app.py               |   8 +-
 .../kendra_chat_llama_2.py                    | 116 ++++++++++++++++++
 3 files changed, 126 insertions(+), 3 deletions(-)
 create mode 100644 kendra_retriever_samples/kendra_chat_llama_2.py

diff --git a/.gitignore b/.gitignore
index 350c1fb..c71f6b7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -82,4 +82,7 @@ dmypy.json
 .DS_Store
 
 # vs code
-.vscode
\ No newline at end of file
+.vscode
+
+# venv files
+env*
\ No newline at end of file
diff --git a/kendra_retriever_samples/app.py b/kendra_retriever_samples/app.py
index 5ba1e29..20d10f1 100644
--- a/kendra_retriever_samples/app.py
+++ b/kendra_retriever_samples/app.py
@@ -6,7 +6,7 @@
 import kendra_chat_flan_xl as flanxl
 import kendra_chat_flan_xxl as flanxxl
 import kendra_chat_open_ai as openai
-
+import kendra_chat_llama_2 as llama2
 
 USER_ICON = "images/user-icon.png"
 AI_ICON = "images/ai-icon.png"
@@ -15,7 +15,8 @@
     'openai': 'Open AI',
     'anthropic': 'Anthropic',
     'flanxl': 'Flan XL',
-    'flanxxl': 'Flan XXL'
+    'flanxxl': 'Flan XXL',
+    'llama2' : 'Llama 2'
 }
 
 # Check if the user ID is already stored in the session state
@@ -42,6 +43,9 @@
         elif (sys.argv[1] == 'openai'):
             st.session_state['llm_app'] = openai
             st.session_state['llm_chain'] = openai.build_chain()
+        elif (sys.argv[1] == 'llama2'):
+            st.session_state['llm_app'] = llama2
+            st.session_state['llm_chain'] = llama2.build_chain()
         else:
             raise Exception("Unsupported LLM: ", sys.argv[1])
     else:
diff --git a/kendra_retriever_samples/kendra_chat_llama_2.py b/kendra_retriever_samples/kendra_chat_llama_2.py
new file mode 100644
index 0000000..c802a2a
--- /dev/null
+++ b/kendra_retriever_samples/kendra_chat_llama_2.py
@@ -0,0 +1,116 @@
+from langchain.retrievers import AmazonKendraRetriever
+from langchain.chains import ConversationalRetrievalChain
+from langchain.prompts import PromptTemplate
+from langchain import SagemakerEndpoint
+from langchain.llms.sagemaker_endpoint import LLMContentHandler
+import sys
+import json
+import os
+
+class bcolors:
+    HEADER = '\033[95m'
+    OKBLUE = '\033[94m'
+    OKCYAN = '\033[96m'
+    OKGREEN = '\033[92m'
+    WARNING = '\033[93m'
+    FAIL = '\033[91m'
+    ENDC = '\033[0m'
+    BOLD = '\033[1m'
+    UNDERLINE = '\033[4m'
+
+MAX_HISTORY_LENGTH = 5
+
+def build_chain():
+  region = os.environ["AWS_REGION"]
+  kendra_index_id = os.environ["KENDRA_INDEX_ID"]
+  endpoint_name = os.environ["LLAMA_2_ENDPOINT"]
+
+  class ContentHandler(LLMContentHandler):
+      content_type = "application/json"
+      accepts = "application/json"
+
+      def transform_input(self, prompt: str, model_kwargs: dict) -> bytes:
+          input_str = json.dumps({"inputs": 
+                                  [[
+                                    #{"role": "system", "content": ""},
+                                    {"role": "user", "content": prompt},
+                                  ]],
+                                  **model_kwargs
+                                  })
+          return input_str.encode('utf-8')
+      
+      def transform_output(self, output: bytes) -> str:
+          response_json = json.loads(output.read().decode("utf-8"))
+          
+          return response_json[0]['generation']['content']
+
+  content_handler = ContentHandler()
+
+  llm=SagemakerEndpoint(
+          endpoint_name=endpoint_name, 
+          region_name=region, 
+          model_kwargs={"max_new_tokens": 1000, "top_p": 0.9,"temperature":0.6},
+          endpoint_kwargs={"CustomAttributes":"accept_eula=true"},
+          content_handler=content_handler,
+      )
+      
+  retriever = AmazonKendraRetriever(index_id=kendra_index_id)
+
+  prompt_template = """
+  The following is a friendly conversation between a human and an AI. 
+  The AI is talkative and provides lots of specific details from its context.
+  If the AI does not know the answer to a question, it truthfully says it 
+  does not know.
+  {context}
+  Instruction: Based on the above documents, provide a detailed answer for, {question} Answer "don't know" 
+  if not present in the document. 
+  Solution:"""
+  PROMPT = PromptTemplate(
+      template=prompt_template, input_variables=["context", "question"],
+  )
+
+  condense_qa_template = """
+  Given the following conversation and a follow up question, rephrase the follow up question 
+  to be a standalone question.
+
+  Chat History:
+  {chat_history}
+  Follow Up Input: {question}
+  Standalone question:"""
+  standalone_question_prompt = PromptTemplate.from_template(condense_qa_template)
+
+  qa = ConversationalRetrievalChain.from_llm(
+        llm=llm, 
+        retriever=retriever, 
+        condense_question_prompt=standalone_question_prompt, 
+        return_source_documents=True, 
+        combine_docs_chain_kwargs={"prompt":PROMPT},
+        )
+  return qa
+
+def run_chain(chain, prompt: str, history=[]):
+   return chain({"question": prompt, "chat_history": history})
+
+if __name__ == "__main__":
+  chat_history = []
+  qa = build_chain()
+  print(bcolors.OKBLUE + "Hello! How can I help you?" + bcolors.ENDC)
+  print(bcolors.OKCYAN + "Ask a question, start a New search: or CTRL-D to exit." + bcolors.ENDC)
+  print(">", end=" ", flush=True)
+  for query in sys.stdin:
+    if (query.strip().lower().startswith("new search:")):
+      query = query.strip().lower().replace("new search:","")
+      chat_history = []
+    elif (len(chat_history) == MAX_HISTORY_LENGTH):
+      chat_history.pop(0)
+    result = run_chain(qa, query, chat_history)
+    chat_history.append((query, result["answer"]))
+    print(bcolors.OKGREEN + result['answer'] + bcolors.ENDC)
+    if 'source_documents' in result:
+      print(bcolors.OKGREEN + 'Sources:')
+      for d in result['source_documents']:
+        print(d.metadata['source'])
+    print(bcolors.ENDC)
+    print(bcolors.OKCYAN + "Ask a question, start a New search: or CTRL-D to exit." + bcolors.ENDC)
+    print(">", end=" ", flush=True)
+  print(bcolors.OKBLUE + "Bye" + bcolors.ENDC)

From 7421cbf99d9674bca1b388b892bdf1b6f15b4fff Mon Sep 17 00:00:00 2001
From: jgalego <jgalego@amazon.pt>
Date: Thu, 20 Jul 2023 12:41:54 +0100
Subject: [PATCH 10/22] Moved files to kendra_retriever_samples

---
 .../genai-kendra-langchain.ipynb                                  | 0
 {samples => kendra_retriever_samples}/kendra_chat_flan_xl_nb.py   | 0
 {samples => kendra_retriever_samples}/skip_kernel_extension.py    | 0
 3 files changed, 0 insertions(+), 0 deletions(-)
 rename {samples => kendra_retriever_samples}/genai-kendra-langchain.ipynb (100%)
 rename {samples => kendra_retriever_samples}/kendra_chat_flan_xl_nb.py (100%)
 rename {samples => kendra_retriever_samples}/skip_kernel_extension.py (100%)

diff --git a/samples/genai-kendra-langchain.ipynb b/kendra_retriever_samples/genai-kendra-langchain.ipynb
similarity index 100%
rename from samples/genai-kendra-langchain.ipynb
rename to kendra_retriever_samples/genai-kendra-langchain.ipynb
diff --git a/samples/kendra_chat_flan_xl_nb.py b/kendra_retriever_samples/kendra_chat_flan_xl_nb.py
similarity index 100%
rename from samples/kendra_chat_flan_xl_nb.py
rename to kendra_retriever_samples/kendra_chat_flan_xl_nb.py
diff --git a/samples/skip_kernel_extension.py b/kendra_retriever_samples/skip_kernel_extension.py
similarity index 100%
rename from samples/skip_kernel_extension.py
rename to kendra_retriever_samples/skip_kernel_extension.py

From c125661f0b6e222091be48ef8eba3d1633d5172c Mon Sep 17 00:00:00 2001
From: jgalego <jgalego@amazon.pt>
Date: Thu, 20 Jul 2023 12:49:21 +0100
Subject: [PATCH 11/22] Fixed SM JumpStart image URL

---
 kendra_retriever_samples/genai-kendra-langchain.ipynb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kendra_retriever_samples/genai-kendra-langchain.ipynb b/kendra_retriever_samples/genai-kendra-langchain.ipynb
index a4edafc..ca07b90 100644
--- a/kendra_retriever_samples/genai-kendra-langchain.ipynb
+++ b/kendra_retriever_samples/genai-kendra-langchain.ipynb
@@ -301,7 +301,7 @@
     "\n",
     "Models like [Flan-T5-XL](https://huggingface.co/google/flan-t5-xl) and [Flan-T5-XXL](https://huggingface.co/google/flan-t5-xxl), which are available on [Hugging Face Transformers](https://huggingface.co/docs/transformers/model_doc/flan-t5), can be deployed via [Amazon SageMaker JumpStart](https://aws.amazon.com/sagemaker/jumpstart/) in a matter of minutes with just a few lines of code.\n",
     "\n",
-    "<img src=\"https://s3.amazonaws.com/moonup/production/uploads/1666363435475-62441d1d9fdefb55a0b7d12c.png\" width=\"50%\"/>"
+    "<img src=\"https://d2908q01vomqb2.cloudfront.net/f1f836cb4ea6efb2a0b1b99f41ad8b103eff4b59/2023/04/25/ML-13807-image003.jpg\" width=\"50%\"/>"
    ]
   },
   {

From cc3aa7d05f322dc2bce4fbff99370e2d0a0a00df Mon Sep 17 00:00:00 2001
From: jgalego <jgalego@amazon.pt>
Date: Thu, 20 Jul 2023 13:06:40 +0100
Subject: [PATCH 12/22] Refactored to install streamlit+langchain from
 requirements.txt

---
 kendra_retriever_samples/genai-kendra-langchain.ipynb | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kendra_retriever_samples/genai-kendra-langchain.ipynb b/kendra_retriever_samples/genai-kendra-langchain.ipynb
index ca07b90..80e4442 100644
--- a/kendra_retriever_samples/genai-kendra-langchain.ipynb
+++ b/kendra_retriever_samples/genai-kendra-langchain.ipynb
@@ -253,7 +253,7 @@
     "\n",
     "# Install streamlit\n",
     "# https://docs.streamlit.io/library/get-started/installation\n",
-    "!{sys.executable} -m pip install -qU streamlit\n",
+    "!{sys.executable} -m pip install -qU $(grep streamlit requirements.txt)\n",
     "\n",
     "# Debug installation\n",
     "# https://docs.streamlit.io/knowledge-base/using-streamlit/sanity-checks\n",
@@ -286,7 +286,7 @@
    "source": [
     "# Install LangChain\n",
     "# https://python.langchain.com/en/latest/getting_started/getting_started.html\n",
-    "!{sys.executable} -m pip install -qU \"langchain==0.0.137\"\n",
+    "!{sys.executable} -m pip install -qU $(grep langchain requirements.txt)\n",
     "\n",
     "# Debug installation\n",
     "!python -c \"import langchain; print(langchain.__version__)\""

From 749afbdd3a52ad33c28f5f02199c49f59ae96485 Mon Sep 17 00:00:00 2001
From: EC2 Default User <ec2-user@ip-172-31-90-156.ec2.internal>
Date: Wed, 26 Jul 2023 13:44:20 +0000
Subject: [PATCH 13/22] Added support for Falcon 40B Instruct BF16 and fixed a
 bug with FLAN-XXL

---
 kendra_retriever_samples/app.py               |   7 +-
 .../kendra_chat_falcon_40b.py                 | 117 ++++++++++++++++++
 .../kendra_chat_flan_xxl.py                   |   6 +-
 .../kendra_retriever_falcon_40b.py            |  77 ++++++++++++
 .../kendra_retriever_flan_xxl.py              |   5 +-
 5 files changed, 207 insertions(+), 5 deletions(-)
 create mode 100644 kendra_retriever_samples/kendra_chat_falcon_40b.py
 create mode 100644 kendra_retriever_samples/kendra_retriever_falcon_40b.py

diff --git a/kendra_retriever_samples/app.py b/kendra_retriever_samples/app.py
index 5ba1e29..6870c19 100644
--- a/kendra_retriever_samples/app.py
+++ b/kendra_retriever_samples/app.py
@@ -6,6 +6,7 @@
 import kendra_chat_flan_xl as flanxl
 import kendra_chat_flan_xxl as flanxxl
 import kendra_chat_open_ai as openai
+import kendra_chat_falcon_40b as falcon40b
 
 
 USER_ICON = "images/user-icon.png"
@@ -15,7 +16,8 @@
     'openai': 'Open AI',
     'anthropic': 'Anthropic',
     'flanxl': 'Flan XL',
-    'flanxxl': 'Flan XXL'
+    'flanxxl': 'Flan XXL',
+    'falcon40b': 'Falcon 40B'
 }
 
 # Check if the user ID is already stored in the session state
@@ -42,6 +44,9 @@
         elif (sys.argv[1] == 'openai'):
             st.session_state['llm_app'] = openai
             st.session_state['llm_chain'] = openai.build_chain()
+        elif (sys.argv[1] == 'falcon40b'):
+            st.session_state['llm_app'] = falcon40b
+            st.session_state['llm_chain'] = falcon40b.build_chain()
         else:
             raise Exception("Unsupported LLM: ", sys.argv[1])
     else:
diff --git a/kendra_retriever_samples/kendra_chat_falcon_40b.py b/kendra_retriever_samples/kendra_chat_falcon_40b.py
new file mode 100644
index 0000000..d49ac50
--- /dev/null
+++ b/kendra_retriever_samples/kendra_chat_falcon_40b.py
@@ -0,0 +1,117 @@
+from langchain.retrievers import AmazonKendraRetriever
+from langchain.chains import ConversationalRetrievalChain
+from langchain import SagemakerEndpoint
+from langchain.llms.sagemaker_endpoint import LLMContentHandler
+from langchain.prompts import PromptTemplate
+import sys
+import json
+import os
+
+class bcolors:
+  HEADER = '\033[95m'
+  OKBLUE = '\033[94m'
+  OKCYAN = '\033[96m'
+  OKGREEN = '\033[92m'
+  WARNING = '\033[93m'
+  FAIL = '\033[91m'
+  ENDC = '\033[0m'
+  BOLD = '\033[1m'
+  UNDERLINE = '\033[4m'
+
+MAX_HISTORY_LENGTH = 5
+
+def build_chain():
+  region = os.environ["AWS_REGION"]
+  kendra_index_id = os.environ["KENDRA_INDEX_ID"]
+  endpoint_name = os.environ["FALCON_40B_ENDPOINT"]
+
+  class ContentHandler(LLMContentHandler):
+      content_type = "application/json"
+      accepts = "application/json"
+
+      def transform_input(self, prompt: str, model_kwargs: dict) -> bytes:
+          input_str = json.dumps({"inputs": prompt, "parameters": model_kwargs})
+          print("input_str", input_str)
+          return input_str.encode('utf-8')
+      
+      def transform_output(self, output: bytes) -> str:
+          response_json = json.loads(output.read().decode("utf-8"))
+          print(response_json)
+          return response_json[0]["generated_text"]
+
+  content_handler = ContentHandler()
+
+  llm=SagemakerEndpoint(
+          endpoint_name=endpoint_name, 
+          region_name=region, 
+          model_kwargs={
+                        "temperature": 0.8, 
+                        "max_length": 10000, 
+                        "max_new_tokens": 512, 
+                        "do_sample": True, 
+                        "top_p": 0.9,
+                        "repetition_penalty": 1.03,
+                        "stop": ["\nUser:","<|endoftext|>","</s>"]
+                     },
+          content_handler=content_handler
+      )
+      
+  retriever = AmazonKendraRetriever(index_id=kendra_index_id)
+
+  prompt_template = """
+  The following is a friendly conversation between a human and an AI. 
+  The AI is talkative and provides lots of specific details from its context.
+  If the AI does not know the answer to a question, it truthfully says it 
+  does not know.
+  {context}
+  Instruction: Based on the above documents, provide a detailed answer for, {question} Answer "don't know" 
+  if not present in the document. 
+  Solution:"""
+  PROMPT = PromptTemplate(
+      template=prompt_template, input_variables=["context", "question"]
+  )
+
+  condense_qa_template = """
+  Given the following conversation and a follow up question, rephrase the follow up question 
+  to be a standalone question.
+
+  Chat History:
+  {chat_history}
+  Follow Up Input: {question}
+  Standalone question:"""
+  standalone_question_prompt = PromptTemplate.from_template(condense_qa_template)
+
+  qa = ConversationalRetrievalChain.from_llm(
+        llm=llm, 
+        retriever=retriever, 
+        condense_question_prompt=standalone_question_prompt, 
+        return_source_documents=True, 
+        combine_docs_chain_kwargs={"prompt":PROMPT})
+  return qa
+
+def run_chain(chain, prompt: str, history=[]):
+  return chain({"question": prompt, "chat_history": history})
+
+if __name__ == "__main__":
+  chat_history = []
+  qa = build_chain()
+  print(bcolors.OKBLUE + "Hello! How can I help you?" + bcolors.ENDC)
+  print(bcolors.OKCYAN + "Ask a question, start a New search: or CTRL-D to exit." + bcolors.ENDC)
+  print(">", end=" ", flush=True)
+  for query in sys.stdin:
+    if (query.strip().lower().startswith("new search:")):
+      query = query.strip().lower().replace("new search:","")
+      chat_history = []
+    elif (len(chat_history) == MAX_HISTORY_LENGTH):
+      chat_history.pop(0)
+    result = run_chain(qa, query, chat_history)
+    chat_history.append((query, result["answer"]))
+    print(bcolors.OKGREEN + result['answer'] + bcolors.ENDC)
+    if 'source_documents' in result:
+      print(bcolors.OKGREEN + 'Sources:')
+      for d in result['source_documents']:
+        print(d.metadata['source'])
+    print(bcolors.ENDC)
+    print(bcolors.OKCYAN + "Ask a question, start a New search: or CTRL-D to exit." + bcolors.ENDC)
+    print(">", end=" ", flush=True)
+  print(bcolors.OKBLUE + "Bye" + bcolors.ENDC)
diff --git a/kendra_retriever_samples/kendra_chat_flan_xxl.py b/kendra_retriever_samples/kendra_chat_flan_xxl.py
index dd5d237..d742c75 100644
--- a/kendra_retriever_samples/kendra_chat_flan_xxl.py
+++ b/kendra_retriever_samples/kendra_chat_flan_xxl.py
@@ -30,12 +30,14 @@ class ContentHandler(LLMContentHandler):
       accepts = "application/json"
 
       def transform_input(self, prompt: str, model_kwargs: dict) -> bytes:
-          input_str = json.dumps({"inputs": prompt, "parameters": model_kwargs})
+          input_str = json.dumps({"text_inputs": prompt, **model_kwargs})
+          print("input_str", input_str)
           return input_str.encode('utf-8')
       
       def transform_output(self, output: bytes) -> str:
           response_json = json.loads(output.read().decode("utf-8"))
-          return response_json[0]["generated_text"]
+          print(response_json)
+          return response_json["generated_texts"][0]
 
   content_handler = ContentHandler()
 
diff --git a/kendra_retriever_samples/kendra_retriever_falcon_40b.py b/kendra_retriever_samples/kendra_retriever_falcon_40b.py
new file mode 100644
index 0000000..bb10db9
--- /dev/null
+++ b/kendra_retriever_samples/kendra_retriever_falcon_40b.py
@@ -0,0 +1,77 @@
+from langchain.retrievers import AmazonKendraRetriever
+from langchain.chains import RetrievalQA
+from langchain import OpenAI
+from langchain.prompts import PromptTemplate
+from langchain import SagemakerEndpoint
+from langchain.llms.sagemaker_endpoint import LLMContentHandler
+import json
+import os
+
+
+def build_chain():
+    region = os.environ["AWS_REGION"]
+    kendra_index_id = os.environ["KENDRA_INDEX_ID"]
+    endpoint_name = os.environ["FALCON_40B_ENDPOINT"]
+
+    class ContentHandler(LLMContentHandler):
+        content_type = "application/json"
+        accepts = "application/json"
+
+        def transform_input(self, prompt: str, model_kwargs: dict) -> bytes:
+            input_str = json.dumps({"inputs": prompt, "parameters": model_kwargs})
+            return input_str.encode('utf-8')
+        
+        def transform_output(self, output: bytes) -> str:
+            response_json = json.loads(output.read().decode("utf-8"))
+            print(response_json)
+            return response_json[0]["generated_text"]
+
+    content_handler = ContentHandler()
+
+    llm=SagemakerEndpoint(
+            endpoint_name=endpoint_name, 
+            region_name=region, 
+            model_kwargs={"temperature":1e-10, "min_length": 10000, "max_length": 10000, "max_new_tokens": 100},
+            content_handler=content_handler
+        )
+
+    retriever = AmazonKendraRetriever(index_id=kendra_index_id)
+
+    prompt_template = """
+    The following is a friendly conversation between a human and an AI. 
+    The AI is talkative and provides lots of specific details from its context.
+    If the AI does not know the answer to a question, it truthfully says it 
+    does not know.
+    {context}
+    Instruction: Based on the above documents, provide a detailed answer for, {question} Answer "don't know" 
+    if not present in the document. 
+    Solution:"""
+    PROMPT = PromptTemplate(
+        template=prompt_template, input_variables=["context", "question"]
+    )
+    chain_type_kwargs = {"prompt": PROMPT}
+    qa = RetrievalQA.from_chain_type(
+        llm, 
+        chain_type="stuff", 
+        retriever=retriever, 
+        chain_type_kwargs=chain_type_kwargs,
+        return_source_documents=True
+    )
+    return qa
+
+def run_chain(chain, prompt: str, history=[]):
+    result = chain(prompt)
+    # To make it compatible with chat samples
+    return {
+        "answer": result['result'],
+        "source_documents": result['source_documents']
+    }
+
+if __name__ == "__main__":
+    chain = build_chain()
+    result = run_chain(chain, "What's SageMaker?")
+    print(result['answer'])
+    if 'source_documents' in result:
+        print('Sources:')
+        for d in result['source_documents']:
+          print(d.metadata['source'])
diff --git a/kendra_retriever_samples/kendra_retriever_flan_xxl.py b/kendra_retriever_samples/kendra_retriever_flan_xxl.py
index e10693c..dd8498f 100644
--- a/kendra_retriever_samples/kendra_retriever_flan_xxl.py
+++ b/kendra_retriever_samples/kendra_retriever_flan_xxl.py
@@ -18,12 +18,13 @@ class ContentHandler(LLMContentHandler):
         accepts = "application/json"
 
         def transform_input(self, prompt: str, model_kwargs: dict) -> bytes:
-            input_str = json.dumps({"inputs": prompt, "parameters": model_kwargs})
+            input_str = json.dumps({"text_inputs": prompt, **model_kwargs})
             return input_str.encode('utf-8')
         
         def transform_output(self, output: bytes) -> str:
             response_json = json.loads(output.read().decode("utf-8"))
-            return response_json[0]["generated_text"]
+            print(response_json)
+            return response_json["generated_texts"][0]
 
     content_handler = ContentHandler()
 

From c4e8219bccea30dae19b7cecc54a22ede9369ae8 Mon Sep 17 00:00:00 2001
From: Piyush Jain <piyushjain@duck.com>
Date: Mon, 26 Jun 2023 17:16:04 -0700
Subject: [PATCH 14/22] Updated samples to use kendra retriever from Langchain

---
 README.md                                     | 138 +-----------------
 aws_langchain/.gitkeep                        |   0
 aws_langchain/__init__.py                     |   1 -
 aws_langchain/kendra_index_retriever.py       |  48 ------
 aws_langchain/kendra_results.py               |  33 -----
 kendra_retriever_samples/README.md            |  68 +++++++++
 .../__init__.py                               |   0
 {samples => kendra_retriever_samples}/app.py  |   0
 kendra_retriever_samples/environment.yml      |  12 ++
 .../images/ai-icon.png                        | Bin
 .../images/user-icon.png                      | Bin
 .../kendra-docs-index.yaml                    |   0
 .../kendra_chat_anthropic.py                  |  52 ++++---
 .../kendra_chat_flan_xl.py                    |  33 +++--
 .../kendra_chat_flan_xxl.py                   |  51 ++++---
 .../kendra_chat_open_ai.py                    |  32 ++--
 .../kendra_retriever_anthropic.py             |  62 ++++++++
 .../kendra_retriever_flan_xl.py               |  15 +-
 .../kendra_retriever_flan_xxl.py              |  15 +-
 .../kendra_retriever_open_ai.py               |  53 +++++++
 kendra_retriever_samples/requirements.txt     |   5 +
 pyproject.toml                                |  41 ------
 samples/kendra_retriever_anthropic.py         |  54 -------
 samples/kendra_retriever_open_ai.py           |  54 -------
 24 files changed, 328 insertions(+), 439 deletions(-)
 delete mode 100644 aws_langchain/.gitkeep
 delete mode 100644 aws_langchain/__init__.py
 delete mode 100644 aws_langchain/kendra_index_retriever.py
 delete mode 100644 aws_langchain/kendra_results.py
 create mode 100644 kendra_retriever_samples/README.md
 rename {samples => kendra_retriever_samples}/__init__.py (100%)
 rename {samples => kendra_retriever_samples}/app.py (100%)
 create mode 100644 kendra_retriever_samples/environment.yml
 rename {samples => kendra_retriever_samples}/images/ai-icon.png (100%)
 rename {samples => kendra_retriever_samples}/images/user-icon.png (100%)
 rename {samples => kendra_retriever_samples}/kendra-docs-index.yaml (100%)
 rename {samples => kendra_retriever_samples}/kendra_chat_anthropic.py (68%)
 rename {samples => kendra_retriever_samples}/kendra_chat_flan_xl.py (76%)
 rename {samples => kendra_retriever_samples}/kendra_chat_flan_xxl.py (71%)
 rename {samples => kendra_retriever_samples}/kendra_chat_open_ai.py (71%)
 create mode 100644 kendra_retriever_samples/kendra_retriever_anthropic.py
 rename {samples => kendra_retriever_samples}/kendra_retriever_flan_xl.py (84%)
 rename {samples => kendra_retriever_samples}/kendra_retriever_flan_xxl.py (84%)
 create mode 100644 kendra_retriever_samples/kendra_retriever_open_ai.py
 create mode 100644 kendra_retriever_samples/requirements.txt
 delete mode 100644 pyproject.toml
 delete mode 100644 samples/kendra_retriever_anthropic.py
 delete mode 100644 samples/kendra_retriever_open_ai.py

diff --git a/README.md b/README.md
index fafb557..4ef4a3a 100644
--- a/README.md
+++ b/README.md
@@ -1,135 +1,5 @@
-# AWS Langchain
-This repo provides a set of utility classes to work with [Langchain](https://github.com/hwchase17/langchain/tree/master). It currently has a retriever class `KendraIndexRetriever` for working with a Kendra index and sample scripts to execute the QA chain for SageMaker, Open AI and Anthropic providers.
-
-## Installing
-
-Clone the repository
-```bash
-git clone https://github.com/aws-samples/amazon-kendra-langchain-extensions.git
-```
-
-Move to the repo dir
-```bash
-cd amazon-kendra-langchain-extensions
-```
-
-
-Install the classes
-```bash
-pip install .
-```
-
-## Usage
-
-Usage with SageMaker Endpoint for Flan-T-XXL
-```python
-from aws_langchain.kendra_index_retriever import KendraIndexRetriever
-from langchain.chains import RetrievalQA
-from langchain import OpenAI
-from langchain.prompts import PromptTemplate
-from langchain import SagemakerEndpoint
-from langchain.llms.sagemaker_endpoint import ContentHandlerBase
-import json
-
-class ContentHandler(ContentHandlerBase):
-    content_type = "application/json"
-    accepts = "application/json"
-
-    def transform_input(self, prompt: str, model_kwargs: dict) -> bytes:
-        input_str = json.dumps({"inputs": prompt, "parameters": model_kwargs})
-        return input_str.encode('utf-8')
-
-    def transform_output(self, output: bytes) -> str:
-        response_json = json.loads(output.read().decode("utf-8"))
-        return response_json[0]["generated_text"]
-
-content_handler = ContentHandler()
-llm=SagemakerEndpoint(
-        endpoint_name=endpoint_name,
-        region_name="us-east-1", 
-        model_kwargs={"temperature":1e-10, "max_length": 500},
-        content_handler=content_handler
-    )
-
-retriever = KendraIndexRetriever(kendraindex=kendra_index_id,
-        awsregion=region,
-        return_source_documents=True
-    )
-
-prompt_template = """
-The following is a friendly conversation between a human and an AI.
-The AI is talkative and provides lots of specific details from its context.
-If the AI does not know the answer to a question, it truthfully says it
-does not know.
-{context}
-Instruction: Based on the above documents, provide a detailed answer for, {question} Answer "don't know" if not present in the document. Solution:
-"""
-PROMPT = PromptTemplate(
-    template=prompt_template, input_variables=["context", "question"]
-)
-chain_type_kwargs = {"prompt": PROMPT}
-qa = RetrievalQA.from_chain_type(
-    llm,
-    chain_type="stuff",
-    retriever=retriever,
-    chain_type_kwargs=chain_type_kwargs,
-    return_source_documents=True
-)
-result = qa("What's SageMaker?")
-print(result['answer'])
-
-```
-
-## Creating an Amazon Kendra index with test data
-If you wish to create a sample Kendra index and index sample data and experiment with the index using the sample applications you can deploy the CloudFormation template samples/kendra-docs-index.yaml
-
-
-## Running samples
-For executing sample chains, install the optional dependencies
-```bash
-pip install ".[samples]"
-```
-
-Ensure that the environment variables are set for the aws region, kendra index id and the provider/model used by the sample.
-For example, for running the `kendra_chat_flan_xl.py` sample, these environment variables must be set: AWS_REGION, KENDRA_INDEX_ID
-and FLAN_XL_ENDPOINT.
-You can use commands as below to set the environment variables.
-```bash
-export AWS_REGION="<YOUR-AWS-REGION>"
-export KENDRA_INDEX_ID="<YOUR-KENDRA-INDEX-ID>"
-export FLAN_XL_ENDPOINT="<YOUR-SAGEMAKER-ENDPOINT-FOR-FLAN-T-XL>"
-export FLAN_XXL_ENDPOINT="<YOUR-SAGEMAKER-ENDPOINT-FOR-FLAN-T-XXL>"
-export OPENAI_API_KEY="<YOUR-OPEN-AI-API-KEY>"
-export ANTHROPIC_API_KEY="<YOUR-ANTHROPIC-API-KEY>"
-```
-
-### Running samples from the streamlit app
-The samples directory is bundled with an `app.py` file that can be run as a web app using streamlit. 
-```bash
-cd samples
-streamlit run app.py anthropic
-```
-The above command will run the `kendra_chat_anthropic` as the LLM chain. In order to run a different chain, pass a different provider, for example for running the `open_ai` chain run this command `streamlit run app.py openai`.
-
-### Running samples from the command line
-```bash
-python samples/<sample-file-name.py>
-```
-
-## Uninstall
-```bash
-pip uninstall aws-langchain
-```
-
-## Contributing
-Create your GitHub branch and make a pull request.
-See [CONTRIBUTING](CONTRIBUTING.md#security-issue-notifications) for more information.
-
-Install in editable mode, this will make sure your changes are synced in local python env
-```bash
-pip install -e ".[dev]"
-```
-
-## License
-This library is licensed under the MIT-0 License. See the LICENSE file.
+# AWS Kendra Langchain Extensions
 
+## kendra_retriever_samples
+This directory contains samples for a QA chain using an `AmazonKendraRetriever` class. For more info see the samples [README](./kendra_retriever_samples/README.md).
+**Note**: If you are using an older version of the repo which contains the `aws_langchain` package, please clone this repo in a new location to avoid any conflicts with the older environment. We are deprecating the `aws_langchain` package, since the kendra retriever class is available in LangChain starting v0.0.213.
\ No newline at end of file
diff --git a/aws_langchain/.gitkeep b/aws_langchain/.gitkeep
deleted file mode 100644
index e69de29..0000000
diff --git a/aws_langchain/__init__.py b/aws_langchain/__init__.py
deleted file mode 100644
index f254e7d..0000000
--- a/aws_langchain/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-"""Classes to work with AWS Kendra and Bedrock LLMs"""
\ No newline at end of file
diff --git a/aws_langchain/kendra_index_retriever.py b/aws_langchain/kendra_index_retriever.py
deleted file mode 100644
index 7096328..0000000
--- a/aws_langchain/kendra_index_retriever.py
+++ /dev/null
@@ -1,48 +0,0 @@
-"""Chain for question-answering against a vector database."""
-from __future__ import annotations
-
-from typing import Any, Dict, List, Optional
-
-from langchain.schema import BaseRetriever, Document
-
-from aws_langchain.kendra_results import kendra_query, kendra_client
-import boto3
-
-class KendraIndexRetriever(BaseRetriever):
-    """Retriever to retrieve documents from Amazon Kendra index.
-
-    Example:
-        .. code-block:: python
-
-            kendraIndexRetriever = KendraIndexRetriever()
-
-    """
-
-    kendraindex: str
-    """Kendra index id"""
-    awsregion: str
-    """AWS region of the Kendra index"""
-    k: int
-    """Number of documents to query for."""
-    return_source_documents: bool
-    """Whether source documents to be returned """
-    kclient: Any
-    """ boto3 client for Kendra. """
-    
-    def __init__(self, kendraindex, awsregion, k=3, return_source_documents=False):
-        self.kendraindex = kendraindex
-        self.awsregion = awsregion
-        self.k = k
-        self.return_source_documents = return_source_documents
-        self.kclient = kendra_client(self.kendraindex, self.awsregion)
-        
-    def get_relevant_documents(self, query: str) -> List[Document]:
-        """Run search on Kendra index and get top k documents
-
-        docs = get_relevant_documents('This is my query')
-        """
-        docs = kendra_query(self.kclient, query, self.k, self.kendraindex)
-        return docs
-    
-    async def aget_relevant_documents(self, query: str) -> List[Document]:
-        return await super().aget_relevant_documents(query)
diff --git a/aws_langchain/kendra_results.py b/aws_langchain/kendra_results.py
deleted file mode 100644
index baa5e51..0000000
--- a/aws_langchain/kendra_results.py
+++ /dev/null
@@ -1,33 +0,0 @@
-from langchain.docstore.document import Document
-import boto3
-import re
-
-def clean_result(res_text):
-    res = re.sub("\s+", " ", res_text).replace("...","")
-    return res
-    
-def get_top_n_results(resp, count):
-    r = resp["ResultItems"][count]
-    doc_title = r["DocumentTitle"]["Text"]
-    doc_uri = r["DocumentURI"]
-    r_type = r["Type"]
-    if (r["AdditionalAttributes"] and r["AdditionalAttributes"][0]["Key"] == "AnswerText"):
-        res_text = r["AdditionalAttributes"][0]["Value"]["TextWithHighlightsValue"]["Text"]
-    else:
-        res_text = r["DocumentExcerpt"]["Text"]
-    doc_excerpt = clean_result(res_text)
-    combined_text = "Document Title: " + doc_title + "\nDocument Excerpt: \n" + doc_excerpt + "\n"
-    return {"page_content":combined_text, "metadata":{"source":doc_uri, "title": doc_title, "excerpt": doc_excerpt, "type": r_type}}
-
-def kendra_query(kclient, kquery, kcount, kindex_id):
-    response = kclient.query(IndexId=kindex_id, QueryText=kquery.strip())
-    if len(response["ResultItems"]) > kcount:
-        r_count = kcount
-    else:
-        r_count = len(response["ResultItems"])
-    docs = [get_top_n_results(response, i) for i in range(0, r_count)]
-    return [Document(page_content = d["page_content"], metadata = d["metadata"]) for d in docs]
-
-def kendra_client(kindex_id, kregion):
-    kclient = boto3.client('kendra', region_name=kregion)
-    return kclient
diff --git a/kendra_retriever_samples/README.md b/kendra_retriever_samples/README.md
new file mode 100644
index 0000000..d12542e
--- /dev/null
+++ b/kendra_retriever_samples/README.md
@@ -0,0 +1,68 @@
+# AWS Langchain
+This repo provides a set of samples to work with [Langchain](https://github.com/hwchase17/langchain/tree/master) and Amazon Kendra. It currently has samples for working with a [Kendra retriever class](https://python.langchain.com/docs/modules/data_connection/retrievers/integrations/amazon_kendra_retriever) to execute a QA chain for SageMaker, Open AI and Anthropic providers. 
+
+## Installing
+
+Clone the repository
+```bash
+git clone https://github.com/aws-samples/amazon-kendra-langchain-extensions.git
+```
+
+Move to the repo dir
+```bash
+cd amazon-kendra-langchain-extensions
+```
+
+Move to the samples dir
+```bash
+cd kendra_retriever_samples
+```
+
+Install the dependencies
+
+If you are using pip
+```bash
+pip install -r requirements.txt
+```
+
+If you are using Conda
+```bash
+conda env create -f environment.yml 
+```
+
+## Running samples
+Ensure that the environment variables are set for the aws region, kendra index id and the provider/model used by the sample.
+For example, for running the `kendra_chat_flan_xl.py` sample, these environment variables must be set: AWS_REGION, KENDRA_INDEX_ID
+and FLAN_XL_ENDPOINT.
+
+You can use commands as below to set the environment variables.
+```bash
+export AWS_REGION="<YOUR-AWS-REGION>"
+export KENDRA_INDEX_ID="<YOUR-KENDRA-INDEX-ID>"
+export FLAN_XL_ENDPOINT="<YOUR-SAGEMAKER-ENDPOINT-FOR-FLAN-T-XL>"
+export FLAN_XXL_ENDPOINT="<YOUR-SAGEMAKER-ENDPOINT-FOR-FLAN-T-XXL>"
+export OPENAI_API_KEY="<YOUR-OPEN-AI-API-KEY>"
+export ANTHROPIC_API_KEY="<YOUR-ANTHROPIC-API-KEY>"
+```
+
+### Running samples from the streamlit app
+The samples directory is bundled with an `app.py` file that can be run as a web app using streamlit. 
+
+```bash
+streamlit run app.py anthropic
+```
+
+The above command will run the `kendra_chat_anthropic` as the LLM chain. In order to run a different chain, pass a different provider, for example for running the `open_ai` chain run this command `streamlit run app.py openai`.
+
+### Running samples from the command line
+```bash
+python <sample-file-name.py>
+```
+
+## Contributing
+Create your fork and submit your changes via a pull request.
+See [CONTRIBUTING](CONTRIBUTING.md#security-issue-notifications) for more information.
+
+## License
+This library is licensed under the MIT-0 License. See the LICENSE file.
+
diff --git a/samples/__init__.py b/kendra_retriever_samples/__init__.py
similarity index 100%
rename from samples/__init__.py
rename to kendra_retriever_samples/__init__.py
diff --git a/samples/app.py b/kendra_retriever_samples/app.py
similarity index 100%
rename from samples/app.py
rename to kendra_retriever_samples/app.py
diff --git a/kendra_retriever_samples/environment.yml b/kendra_retriever_samples/environment.yml
new file mode 100644
index 0000000..72591c7
--- /dev/null
+++ b/kendra_retriever_samples/environment.yml
@@ -0,0 +1,12 @@
+name: kendra-retriever-samples
+channels:
+  - https://conda.anaconda.org/conda-forge
+dependencies:
+  - python=3.10
+  - pip
+  - pip:
+    - langchain>=0.0.213
+    - boto3>=1.26.159
+    - openai
+    - anthropic
+    - streamlit
diff --git a/samples/images/ai-icon.png b/kendra_retriever_samples/images/ai-icon.png
similarity index 100%
rename from samples/images/ai-icon.png
rename to kendra_retriever_samples/images/ai-icon.png
diff --git a/samples/images/user-icon.png b/kendra_retriever_samples/images/user-icon.png
similarity index 100%
rename from samples/images/user-icon.png
rename to kendra_retriever_samples/images/user-icon.png
diff --git a/samples/kendra-docs-index.yaml b/kendra_retriever_samples/kendra-docs-index.yaml
similarity index 100%
rename from samples/kendra-docs-index.yaml
rename to kendra_retriever_samples/kendra-docs-index.yaml
diff --git a/samples/kendra_chat_anthropic.py b/kendra_retriever_samples/kendra_chat_anthropic.py
similarity index 68%
rename from samples/kendra_chat_anthropic.py
rename to kendra_retriever_samples/kendra_chat_anthropic.py
index 73f19d9..856a8a6 100644
--- a/samples/kendra_chat_anthropic.py
+++ b/kendra_retriever_samples/kendra_chat_anthropic.py
@@ -1,20 +1,20 @@
-from aws_langchain.kendra_index_retriever import KendraIndexRetriever
+from langchain.retrievers import AmazonKendraRetriever
 from langchain.chains import ConversationalRetrievalChain
 from langchain.prompts import PromptTemplate
-from langchain.llms import Anthropic
+from langchain.chat_models import ChatAnthropic as Anthropic
 import sys
 import os
 
 class bcolors:
-    HEADER = '\033[95m'
-    OKBLUE = '\033[94m'
-    OKCYAN = '\033[96m'
-    OKGREEN = '\033[92m'
-    WARNING = '\033[93m'
-    FAIL = '\033[91m'
-    ENDC = '\033[0m'
-    BOLD = '\033[1m'
-    UNDERLINE = '\033[4m'
+  HEADER = '\033[95m'
+  OKBLUE = '\033[94m'
+  OKCYAN = '\033[96m'
+  OKGREEN = '\033[92m'
+  WARNING = '\033[93m'
+  FAIL = '\033[91m'
+  ENDC = '\033[0m'
+  BOLD = '\033[1m'
+  UNDERLINE = '\033[4m'
 
 MAX_HISTORY_LENGTH = 5
 
@@ -25,9 +25,7 @@ def build_chain():
 
   llm = Anthropic(temperature=0, anthropic_api_key=ANTHROPIC_API_KEY, max_tokens_to_sample = 512)
       
-  retriever = KendraIndexRetriever(kendraindex=kendra_index_id, 
-      awsregion=region, 
-      return_source_documents=True)
+  retriever = AmazonKendraRetriever(index_id=kendra_index_id)
 
   prompt_template = """
 
@@ -42,22 +40,36 @@ def build_chain():
   <documents>
   {context}
   </documents>
-  Based on the above documents, provide a detailed answer for, {question} Answer "don't know" if not present in the document. 
+  Based on the above documents, provide a detailed answer for, {question} Answer "don't know" 
+  if not present in the document. 
+
+  Assistant:"""
 
-Assistant:
-  """
   PROMPT = PromptTemplate(
       template=prompt_template, input_variables=["context", "question"]
   )
 
-  qa = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever, qa_prompt=PROMPT, return_source_documents=True)
-  return qa
+  condense_qa_template = """
+  Given the following conversation and a follow up question, rephrase the follow up question 
+  to be a standalone question.
 
+  Chat History:
+  {chat_history}
+  Follow Up Input: {question}
+  Standalone question:"""
+  standalone_question_prompt = PromptTemplate.from_template(condense_qa_template)
+
+  qa = ConversationalRetrievalChain.from_llm(
+        llm=llm, 
+        retriever=retriever, 
+        condense_question_prompt=standalone_question_prompt, 
+        return_source_documents=True, 
+        combine_docs_chain_kwargs={"prompt":PROMPT})
+  return qa
 
 def run_chain(chain, prompt: str, history=[]):
   return chain({"question": prompt, "chat_history": history})
 
-
 if __name__ == "__main__":
   chat_history = []
   qa = build_chain()
diff --git a/samples/kendra_chat_flan_xl.py b/kendra_retriever_samples/kendra_chat_flan_xl.py
similarity index 76%
rename from samples/kendra_chat_flan_xl.py
rename to kendra_retriever_samples/kendra_chat_flan_xl.py
index 7d82ea5..18d828a 100644
--- a/samples/kendra_chat_flan_xl.py
+++ b/kendra_retriever_samples/kendra_chat_flan_xl.py
@@ -1,8 +1,8 @@
-from aws_langchain.kendra_index_retriever import KendraIndexRetriever
+from langchain.retrievers import AmazonKendraRetriever
 from langchain.chains import ConversationalRetrievalChain
 from langchain.prompts import PromptTemplate
 from langchain import SagemakerEndpoint
-from langchain.llms.sagemaker_endpoint import ContentHandlerBase
+from langchain.llms.sagemaker_endpoint import LLMContentHandler
 import sys
 import json
 import os
@@ -25,7 +25,7 @@ def build_chain():
   kendra_index_id = os.environ["KENDRA_INDEX_ID"]
   endpoint_name = os.environ["FLAN_XL_ENDPOINT"]
 
-  class ContentHandler(ContentHandlerBase):
+  class ContentHandler(LLMContentHandler):
       content_type = "application/json"
       accepts = "application/json"
 
@@ -45,11 +45,8 @@ def transform_output(self, output: bytes) -> str:
           model_kwargs={"temperature":1e-10, "max_length": 500},
           content_handler=content_handler
       )
-
       
-  retriever = KendraIndexRetriever(kendraindex=kendra_index_id, 
-      awsregion=region, 
-      return_source_documents=True)
+  retriever = AmazonKendraRetriever(index_id=kendra_index_id)
 
   prompt_template = """
   The following is a friendly conversation between a human and an AI. 
@@ -57,13 +54,29 @@ def transform_output(self, output: bytes) -> str:
   If the AI does not know the answer to a question, it truthfully says it 
   does not know.
   {context}
-  Instruction: Based on the above documents, provide a detailed answer for, {question} Answer "don't know" if not present in the document. Solution:
-  """
+  Instruction: Based on the above documents, provide a detailed answer for, {question} Answer "don't know" 
+  if not present in the document. 
+  Solution:"""
   PROMPT = PromptTemplate(
       template=prompt_template, input_variables=["context", "question"]
   )
 
-  qa = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever, qa_prompt=PROMPT, return_source_documents=True)
+  condense_qa_template = """
+  Given the following conversation and a follow up question, rephrase the follow up question 
+  to be a standalone question.
+
+  Chat History:
+  {chat_history}
+  Follow Up Input: {question}
+  Standalone question:"""
+  standalone_question_prompt = PromptTemplate.from_template(condense_qa_template)
+
+  qa = ConversationalRetrievalChain.from_llm(
+        llm=llm, 
+        retriever=retriever, 
+        condense_question_prompt=standalone_question_prompt, 
+        return_source_documents=True, 
+        combine_docs_chain_kwargs={"prompt":PROMPT})
   return qa
 
 def run_chain(chain, prompt: str, history=[]):
diff --git a/samples/kendra_chat_flan_xxl.py b/kendra_retriever_samples/kendra_chat_flan_xxl.py
similarity index 71%
rename from samples/kendra_chat_flan_xxl.py
rename to kendra_retriever_samples/kendra_chat_flan_xxl.py
index 3f4eba6..12cde86 100644
--- a/samples/kendra_chat_flan_xxl.py
+++ b/kendra_retriever_samples/kendra_chat_flan_xxl.py
@@ -1,22 +1,22 @@
-from aws_langchain.kendra_index_retriever import KendraIndexRetriever
+from langchain.retrievers import AmazonKendraRetriever
 from langchain.chains import ConversationalRetrievalChain
 from langchain import SagemakerEndpoint
-from langchain.llms.sagemaker_endpoint import ContentHandlerBase
+from langchain.llms.sagemaker_endpoint import LLMContentHandler
 from langchain.prompts import PromptTemplate
 import sys
 import json
 import os
 
 class bcolors:
-    HEADER = '\033[95m'
-    OKBLUE = '\033[94m'
-    OKCYAN = '\033[96m'
-    OKGREEN = '\033[92m'
-    WARNING = '\033[93m'
-    FAIL = '\033[91m'
-    ENDC = '\033[0m'
-    BOLD = '\033[1m'
-    UNDERLINE = '\033[4m'
+  HEADER = '\033[95m'
+  OKBLUE = '\033[94m'
+  OKCYAN = '\033[96m'
+  OKGREEN = '\033[92m'
+  WARNING = '\033[93m'
+  FAIL = '\033[91m'
+  ENDC = '\033[0m'
+  BOLD = '\033[1m'
+  UNDERLINE = '\033[4m'
 
 MAX_HISTORY_LENGTH = 5
 
@@ -25,7 +25,7 @@ def build_chain():
   kendra_index_id = os.environ["KENDRA_INDEX_ID"]
   endpoint_name = os.environ["FLAN_XXL_ENDPOINT"]
 
-  class ContentHandler(ContentHandlerBase):
+  class ContentHandler(LLMContentHandler):
       content_type = "application/json"
       accepts = "application/json"
 
@@ -46,9 +46,7 @@ def transform_output(self, output: bytes) -> str:
           content_handler=content_handler
       )
       
-  retriever = KendraIndexRetriever(kendraindex=kendra_index_id, 
-      awsregion=region, 
-      return_source_documents=True)
+  retriever = AmazonKendraRetriever(index_id=kendra_index_id)
 
   prompt_template = """
   The following is a friendly conversation between a human and an AI. 
@@ -56,19 +54,34 @@ def transform_output(self, output: bytes) -> str:
   If the AI does not know the answer to a question, it truthfully says it 
   does not know.
   {context}
-  Instruction: Based on the above documents, provide a detailed answer for, {question} Answer "don't know" if not present in the document. Solution:
-  """
+  Instruction: Based on the above documents, provide a detailed answer for, {question} Answer "don't know" 
+  if not present in the document. 
+  Solution:"""
   PROMPT = PromptTemplate(
       template=prompt_template, input_variables=["context", "question"]
   )
 
-  qa = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever, qa_prompt=PROMPT, return_source_documents=True)
+  condense_qa_template = """
+  Given the following conversation and a follow up question, rephrase the follow up question 
+  to be a standalone question.
+
+  Chat History:
+  {chat_history}
+  Follow Up Input: {question}
+  Standalone question:"""
+  standalone_question_prompt = PromptTemplate.from_template(condense_qa_template)
+
+  qa = ConversationalRetrievalChain.from_llm(
+        llm=llm, 
+        retriever=retriever, 
+        condense_question_prompt=standalone_question_prompt, 
+        return_source_documents=True, 
+        combine_docs_chain_kwargs={"prompt":PROMPT})
   return qa
 
 def run_chain(chain, prompt: str, history=[]):
   return chain({"question": prompt, "chat_history": history})
 
-
 if __name__ == "__main__":
   chat_history = []
   qa = build_chain()
diff --git a/samples/kendra_chat_open_ai.py b/kendra_retriever_samples/kendra_chat_open_ai.py
similarity index 71%
rename from samples/kendra_chat_open_ai.py
rename to kendra_retriever_samples/kendra_chat_open_ai.py
index 1ca2f5a..9615ee9 100644
--- a/samples/kendra_chat_open_ai.py
+++ b/kendra_retriever_samples/kendra_chat_open_ai.py
@@ -1,4 +1,4 @@
-from aws_langchain.kendra_index_retriever import KendraIndexRetriever
+from langchain.retrievers import AmazonKendraRetriever
 from langchain.chains import ConversationalRetrievalChain
 from langchain.prompts import PromptTemplate
 from langchain import OpenAI
@@ -13,9 +13,7 @@ def build_chain():
 
   llm = OpenAI(batch_size=5, temperature=0, max_tokens=300)
       
-  retriever = KendraIndexRetriever(kendraindex=kendra_index_id, 
-      awsregion=region, 
-      return_source_documents=True)
+  retriever = AmazonKendraRetriever(index_id=kendra_index_id)
 
   prompt_template = """
   The following is a friendly conversation between a human and an AI. 
@@ -23,14 +21,30 @@ def build_chain():
   If the AI does not know the answer to a question, it truthfully says it 
   does not know.
   {context}
-  Instruction: Based on the above documents, provide a detailed answer for, {question} Answer "don't know" if not present in the document. Solution:
-  """
+  Instruction: Based on the above documents, provide a detailed answer for, {question} Answer "don't know" 
+  if not present in the document. 
+  Solution:"""
   PROMPT = PromptTemplate(
       template=prompt_template, input_variables=["context", "question"]
   )
 
-  return ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever, qa_prompt=PROMPT, return_source_documents=True)
-  
+  condense_qa_template = """
+  Given the following conversation and a follow up question, rephrase the follow up question 
+  to be a standalone question.
+
+  Chat History:
+  {chat_history}
+  Follow Up Input: {question}
+  Standalone question:"""
+  standalone_question_prompt = PromptTemplate.from_template(condense_qa_template)
+
+  qa = ConversationalRetrievalChain.from_llm(
+        llm=llm, 
+        retriever=retriever, 
+        condense_question_prompt=standalone_question_prompt, 
+        return_source_documents=True, 
+        combine_docs_chain_kwargs={"prompt":PROMPT})
+  return qa
 
 def run_chain(chain, prompt: str, history=[]):
   return chain({"question": prompt, "chat_history": history})
@@ -69,4 +83,4 @@ class bcolors:
     print(bcolors.ENDC)
     print(bcolors.OKCYAN + "Ask a question, start a New search: or CTRL-D to exit." + bcolors.ENDC)
     print(">", end=" ", flush=True)
-  print(bcolors.OKBLUE + "Bye" + bcolors.ENDC)
\ No newline at end of file
+  print(bcolors.OKBLUE + "Bye" + bcolors.ENDC)
diff --git a/kendra_retriever_samples/kendra_retriever_anthropic.py b/kendra_retriever_samples/kendra_retriever_anthropic.py
new file mode 100644
index 0000000..2344398
--- /dev/null
+++ b/kendra_retriever_samples/kendra_retriever_anthropic.py
@@ -0,0 +1,62 @@
+from langchain.retrievers import AmazonKendraRetriever
+from langchain.chains import RetrievalQA
+from langchain.prompts import PromptTemplate
+from langchain.chat_models import ChatAnthropic as Anthropic
+import os
+
+
+def build_chain():
+  ANTHROPIC_API_KEY = os.environ["ANTHROPIC_API_KEY"]
+  region = os.environ["AWS_REGION"]
+  kendra_index_id = os.environ["KENDRA_INDEX_ID"]
+
+  llm = Anthropic(temperature=0, anthropic_api_key=ANTHROPIC_API_KEY)
+        
+  retriever = AmazonKendraRetriever(index_id=kendra_index_id)
+
+  prompt_template = """
+
+  Human: This is a friendly conversation between a human and an AI. 
+  The AI is talkative and provides specific details from its context but limits it to 240 tokens.
+  If the AI does not know the answer to a question, it truthfully says it 
+  does not know.
+
+  Assistant: OK, got it, I'll be a talkative truthful AI assistant.
+
+  Human: Here are a few documents in <documents> tags:
+  <documents>
+  {context}
+  </documents>
+  Based on the above documents, provide a detailed answer for, {question} Answer "don't know" 
+  if not present in the document. 
+
+  Assistant:"""
+
+  PROMPT = PromptTemplate(
+      template=prompt_template, input_variables=["context", "question"]
+  )
+  chain_type_kwargs = {"prompt": PROMPT}
+  return RetrievalQA.from_chain_type(
+      llm, 
+      chain_type="stuff", 
+      retriever=retriever, 
+      chain_type_kwargs=chain_type_kwargs,
+      return_source_documents=True
+  )
+
+def run_chain(chain, prompt: str, history=[]):
+    result = chain(prompt)
+    # To make it compatible with chat samples
+    return {
+        "answer": result['result'],
+        "source_documents": result['source_documents']
+    }
+
+if __name__ == "__main__":
+    chain = build_chain()
+    result = run_chain(chain, "What's SageMaker?")
+    print(result['answer'])
+    if 'source_documents' in result:
+        print('Sources:')
+        for d in result['source_documents']:
+          print(d.metadata['source'])
diff --git a/samples/kendra_retriever_flan_xl.py b/kendra_retriever_samples/kendra_retriever_flan_xl.py
similarity index 84%
rename from samples/kendra_retriever_flan_xl.py
rename to kendra_retriever_samples/kendra_retriever_flan_xl.py
index bc628d3..15fda0a 100644
--- a/samples/kendra_retriever_flan_xl.py
+++ b/kendra_retriever_samples/kendra_retriever_flan_xl.py
@@ -1,9 +1,9 @@
-from aws_langchain.kendra_index_retriever import KendraIndexRetriever
+from langchain.retrievers import AmazonKendraRetriever
 from langchain.chains import RetrievalQA
 from langchain import OpenAI
 from langchain.prompts import PromptTemplate
 from langchain import SagemakerEndpoint
-from langchain.llms.sagemaker_endpoint import ContentHandlerBase
+from langchain.llms.sagemaker_endpoint import LLMContentHandler
 import json
 import os
 
@@ -13,7 +13,7 @@ def build_chain():
     kendra_index_id = os.environ["KENDRA_INDEX_ID"]
     endpoint_name = os.environ["FLAN_XL_ENDPOINT"]
 
-    class ContentHandler(ContentHandlerBase):
+    class ContentHandler(LLMContentHandler):
         content_type = "application/json"
         accepts = "application/json"
 
@@ -34,9 +34,7 @@ def transform_output(self, output: bytes) -> str:
             content_handler=content_handler
         )
 
-    retriever = KendraIndexRetriever(kendraindex=kendra_index_id, 
-        awsregion=region, 
-        return_source_documents=True)
+    retriever = AmazonKendraRetriever(index_id=kendra_index_id)
 
     prompt_template = """
     The following is a friendly conversation between a human and an AI. 
@@ -44,8 +42,9 @@ def transform_output(self, output: bytes) -> str:
     If the AI does not know the answer to a question, it truthfully says it 
     does not know.
     {context}
-    Instruction: Based on the above documents, provide a detailed answer for, {question} Answer "don't know" if not present in the document. Solution:
-    """
+    Instruction: Based on the above documents, provide a detailed answer for, {question} Answer "don't know" 
+    if not present in the document. 
+    Solution:"""
     PROMPT = PromptTemplate(
         template=prompt_template, input_variables=["context", "question"]
     )
diff --git a/samples/kendra_retriever_flan_xxl.py b/kendra_retriever_samples/kendra_retriever_flan_xxl.py
similarity index 84%
rename from samples/kendra_retriever_flan_xxl.py
rename to kendra_retriever_samples/kendra_retriever_flan_xxl.py
index 892d4e2..8d61f8b 100644
--- a/samples/kendra_retriever_flan_xxl.py
+++ b/kendra_retriever_samples/kendra_retriever_flan_xxl.py
@@ -1,9 +1,9 @@
-from aws_langchain.kendra_index_retriever import KendraIndexRetriever
+from langchain.retrievers import AmazonKendraRetriever
 from langchain.chains import RetrievalQA
 from langchain import OpenAI
 from langchain.prompts import PromptTemplate
 from langchain import SagemakerEndpoint
-from langchain.llms.sagemaker_endpoint import ContentHandlerBase
+from langchain.llms.sagemaker_endpoint import LLMContentHandler
 import json
 import os
 
@@ -13,7 +13,7 @@ def build_chain():
     kendra_index_id = os.environ["KENDRA_INDEX_ID"]
     endpoint_name = os.environ["FLAN_XXL_ENDPOINT"]
 
-    class ContentHandler(ContentHandlerBase):
+    class ContentHandler(LLMContentHandler):
         content_type = "application/json"
         accepts = "application/json"
 
@@ -34,9 +34,7 @@ def transform_output(self, output: bytes) -> str:
             content_handler=content_handler
         )
 
-    retriever = KendraIndexRetriever(kendraindex=kendra_index_id, 
-        awsregion=region, 
-        return_source_documents=True)
+    retriever = AmazonKendraRetriever(index_id=kendra_index_id)
 
     prompt_template = """
     The following is a friendly conversation between a human and an AI. 
@@ -44,8 +42,9 @@ def transform_output(self, output: bytes) -> str:
     If the AI does not know the answer to a question, it truthfully says it 
     does not know.
     {context}
-    Instruction: Based on the above documents, provide a detailed answer for, {question} Answer "don't know" if not present in the document. Solution:
-    """
+    Instruction: Based on the above documents, provide a detailed answer for, {question} Answer "don't know" 
+    if not present in the document. 
+    Solution:"""
     PROMPT = PromptTemplate(
         template=prompt_template, input_variables=["context", "question"]
     )
diff --git a/kendra_retriever_samples/kendra_retriever_open_ai.py b/kendra_retriever_samples/kendra_retriever_open_ai.py
new file mode 100644
index 0000000..d370684
--- /dev/null
+++ b/kendra_retriever_samples/kendra_retriever_open_ai.py
@@ -0,0 +1,53 @@
+from langchain.retrievers import AmazonKendraRetriever
+from langchain.chains import RetrievalQA
+from langchain import OpenAI
+from langchain.prompts import PromptTemplate
+import os
+
+
+def build_chain():
+  region = os.environ["AWS_REGION"]
+  kendra_index_id = os.environ["KENDRA_INDEX_ID"]
+
+  llm = OpenAI(batch_size=5, temperature=0, max_tokens=300)
+
+  retriever = AmazonKendraRetriever(index_id=kendra_index_id)
+
+  prompt_template = """
+  The following is a friendly conversation between a human and an AI. 
+  The AI is talkative and provides lots of specific details from its context.
+  If the AI does not know the answer to a question, it truthfully says it 
+  does not know.
+  {context}
+  Instruction: Based on the above documents, provide a detailed answer for, {question} Answer "don't know" 
+  if not present in the document. 
+  Solution:"""
+  PROMPT = PromptTemplate(
+      template=prompt_template, input_variables=["context", "question"]
+  )
+  chain_type_kwargs = {"prompt": PROMPT}
+    
+  return RetrievalQA.from_chain_type(
+      llm, 
+      chain_type="stuff", 
+      retriever=retriever, 
+      chain_type_kwargs=chain_type_kwargs, 
+      return_source_documents=True
+  )
+
+def run_chain(chain, prompt: str, history=[]):
+    result = chain(prompt)
+    # To make it compatible with chat samples
+    return {
+        "answer": result['result'],
+        "source_documents": result['source_documents']
+    }
+
+if __name__ == "__main__":
+    chain = build_chain()
+    result = run_chain(chain, "What's SageMaker?")
+    print(result['answer'])
+    if 'source_documents' in result:
+        print('Sources:')
+        for d in result['source_documents']:
+          print(d.metadata['source'])
diff --git a/kendra_retriever_samples/requirements.txt b/kendra_retriever_samples/requirements.txt
new file mode 100644
index 0000000..8e3f546
--- /dev/null
+++ b/kendra_retriever_samples/requirements.txt
@@ -0,0 +1,5 @@
+langchain>=0.0.213
+boto3>=1.26.159
+openai
+anthropic
+streamlit
diff --git a/pyproject.toml b/pyproject.toml
deleted file mode 100644
index 957de6b..0000000
--- a/pyproject.toml
+++ /dev/null
@@ -1,41 +0,0 @@
-[build-system]
-requires = ["setuptools>=61.0"]
-build-backend = "setuptools.build_meta"
-
-[project]
-name = "aws_langchain"
-version = "0.0.1"
-description = "Langchain utility classes to aid working with AWS services"
-readme = "README.md"
-requires-python = ">=3.8"
-classifiers = [
-    "Programming Language :: Python :: 3",
-    "License :: OSI Approved :: MIT License",
-    "Operating System :: OS Independent",
-]
-
-dependencies = [
-    "langchain==0.0.137",
-    "boto3",
-    "html2text"
-]
-
-[project.optional-dependencies]
-samples = [
-    "openai",
-    "anthropic",
-    "streamlit"
-]
-
-dev = [
-    "openai",
-    "anthropic",
-    "streamlit"
-]
-
-[project.urls]
-"Homepage" = "https://gitlab.aws.dev/langchain/aws-langchain"
-"Bug Tracker" = "https://gitlab.aws.dev/langchain/aws-langchain/issues"
-
-[tool.setuptools]
-packages = ["aws_langchain"]
diff --git a/samples/kendra_retriever_anthropic.py b/samples/kendra_retriever_anthropic.py
deleted file mode 100644
index 4a9d8c9..0000000
--- a/samples/kendra_retriever_anthropic.py
+++ /dev/null
@@ -1,54 +0,0 @@
-from aws_langchain.kendra_index_retriever import KendraIndexRetriever
-from langchain.chains import RetrievalQA
-from langchain.prompts import PromptTemplate
-from langchain.llms import Anthropic
-import os
-
-
-def build_chain():
-    ANTHROPIC_API_KEY = os.environ["ANTHROPIC_API_KEY"]
-    region = os.environ["AWS_REGION"]
-    kendra_index_id = os.environ["KENDRA_INDEX_ID"]
-
-    llm = Anthropic(temperature=0, anthropic_api_key=ANTHROPIC_API_KEY)
-        
-    retriever = KendraIndexRetriever(kendraindex=kendra_index_id, 
-        awsregion=region, 
-        return_source_documents=True)
-
-    prompt_template = """
-    The following is a friendly conversation between a human and an AI. 
-    The AI is talkative and provides lots of specific details from its context.
-    If the AI does not know the answer to a question, it truthfully says it 
-    does not know.
-    {context}
-    Question: Based on the above documents, provide a detailed answer for, {question} Answer "don't know" if not present in the document. Answer:
-    """
-    PROMPT = PromptTemplate(
-        template=prompt_template, input_variables=["context", "question"]
-    )
-    chain_type_kwargs = {"prompt": PROMPT}
-    return RetrievalQA.from_chain_type(
-        llm, 
-        chain_type="stuff", 
-        retriever=retriever, 
-        chain_type_kwargs=chain_type_kwargs,
-        return_source_documents=True
-    )
-
-def run_chain(chain, prompt: str, history=[]):
-    result = chain(prompt)
-    # To make it compatible with chat samples
-    return {
-        "answer": result['result'],
-        "source_documents": result['source_documents']
-    }
-
-if __name__ == "__main__":
-    chain = build_chain()
-    result = run_chain(chain, "What's SageMaker?")
-    print(result['answer'])
-    if 'source_documents' in result:
-        print('Sources:')
-        for d in result['source_documents']:
-          print(d.metadata['source'])
diff --git a/samples/kendra_retriever_open_ai.py b/samples/kendra_retriever_open_ai.py
deleted file mode 100644
index 67daa94..0000000
--- a/samples/kendra_retriever_open_ai.py
+++ /dev/null
@@ -1,54 +0,0 @@
-from aws_langchain.kendra_index_retriever import KendraIndexRetriever
-from langchain.chains import RetrievalQA
-from langchain import OpenAI
-from langchain.prompts import PromptTemplate
-import os
-
-
-def build_chain():
-    region = os.environ["AWS_REGION"]
-    kendra_index_id = os.environ["KENDRA_INDEX_ID"]
-
-    llm = OpenAI(batch_size=5, temperature=0, max_tokens=300)
-
-    retriever = KendraIndexRetriever(kendraindex=kendra_index_id, 
-        awsregion=region, 
-        return_source_documents=True)
-
-    prompt_template = """
-    The following is a friendly conversation between a human and an AI. 
-    The AI is talkative and provides lots of specific details from its context.
-    If the AI does not know the answer to a question, it truthfully says it 
-    does not know.
-    {context}
-    Instruction: Based on the above documents, provide a detailed answer for, {question} Answer "don't know" if not present in the document. Solution:
-    """
-    PROMPT = PromptTemplate(
-        template=prompt_template, input_variables=["context", "question"]
-    )
-    chain_type_kwargs = {"prompt": PROMPT}
-    
-    return RetrievalQA.from_chain_type(
-        llm, 
-        chain_type="stuff", 
-        retriever=retriever, 
-        chain_type_kwargs=chain_type_kwargs, 
-        return_source_documents=True
-    )
-
-def run_chain(chain, prompt: str, history=[]):
-    result = chain(prompt)
-    # To make it compatible with chat samples
-    return {
-        "answer": result['result'],
-        "source_documents": result['source_documents']
-    }
-
-if __name__ == "__main__":
-    chain = build_chain()
-    result = run_chain(chain, "What's SageMaker?")
-    print(result['answer'])
-    if 'source_documents' in result:
-        print('Sources:')
-        for d in result['source_documents']:
-          print(d.metadata['source'])

From 4216b910532cc097578dd83d3fc225be7eb900e3 Mon Sep 17 00:00:00 2001
From: Piyush Jain <piyushjain@duck.com>
Date: Thu, 29 Jun 2023 13:34:28 -0700
Subject: [PATCH 15/22] Upgraded langchain to +0.0.219

---
 kendra_retriever_samples/environment.yml  | 2 +-
 kendra_retriever_samples/requirements.txt | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/kendra_retriever_samples/environment.yml b/kendra_retriever_samples/environment.yml
index 72591c7..51b19c4 100644
--- a/kendra_retriever_samples/environment.yml
+++ b/kendra_retriever_samples/environment.yml
@@ -5,7 +5,7 @@ dependencies:
   - python=3.10
   - pip
   - pip:
-    - langchain>=0.0.213
+    - langchain>=0.0.219
     - boto3>=1.26.159
     - openai
     - anthropic
diff --git a/kendra_retriever_samples/requirements.txt b/kendra_retriever_samples/requirements.txt
index 8e3f546..0279e6b 100644
--- a/kendra_retriever_samples/requirements.txt
+++ b/kendra_retriever_samples/requirements.txt
@@ -1,4 +1,4 @@
-langchain>=0.0.213
+langchain>=0.0.219
 boto3>=1.26.159
 openai
 anthropic

From 1c03b0902fba71b8c78b895ca7dc0f47baa2717a Mon Sep 17 00:00:00 2001
From: Abhinav Jawadekar <abhjaw@amazon.com>
Date: Wed, 12 Jul 2023 18:27:08 -0700
Subject: [PATCH 16/22] Pin langchain version to 0.0.219 to avoid breaking
 changes

---
 kendra_retriever_samples/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kendra_retriever_samples/requirements.txt b/kendra_retriever_samples/requirements.txt
index 0279e6b..deffe81 100644
--- a/kendra_retriever_samples/requirements.txt
+++ b/kendra_retriever_samples/requirements.txt
@@ -1,4 +1,4 @@
-langchain>=0.0.219
+langchain==0.0.219
 boto3>=1.26.159
 openai
 anthropic

From 43fe0e0c5d0de9166037d1afa0d8373535ec301f Mon Sep 17 00:00:00 2001
From: Abhinav Jawadekar <abhjaw@amazon.com>
Date: Mon, 31 Jul 2023 17:09:35 -0700
Subject: [PATCH 17/22] Now users can choose between Kendra Enterpries Edition
 and Kendra Developer Edition index

---
 kendra_retriever_samples/kendra-docs-index.yaml | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/kendra_retriever_samples/kendra-docs-index.yaml b/kendra_retriever_samples/kendra-docs-index.yaml
index fd628b3..0511661 100644
--- a/kendra_retriever_samples/kendra-docs-index.yaml
+++ b/kendra_retriever_samples/kendra-docs-index.yaml
@@ -65,7 +65,7 @@ Resources:
         - ''
         - - !Ref 'AWS::StackName'
           - '-Index'
-      Edition: 'DEVELOPER_EDITION'
+      Edition: !Ref KendraEdition
       RoleArn: !GetAtt KendraIndexRole.Arn
 
   ##Create the Role needed to attach the Webcrawler Data Source
@@ -203,6 +203,15 @@ Resources:
     Properties:
       ServiceToken: !GetAtt DataSourceSyncLambda.Arn
     
+Parameters:
+  KendraEdition:
+    Type: String
+    Default: 'ENTERPRISE_EDITION'
+    AllowedValues:
+      - 'ENTERPRISE_EDITION'
+      - 'DEVELOPER_EDITION'
+    Description: 'ENTERPRISE_EDITION (default) is recommended for production deployments, and offers high availability and scale up capabilities. DEVELOPER_EDITION (Free Tier eligible) is suitable for temporary, non-production, experimental workloads. NOTE: indexes cannot currently be migrated from one type to another.'
+
 Outputs:
   KendraIndexID:
     Value: !GetAtt DocsKendraIndex.Id

From 686c76451cf17b726b4c0c33a24bc3685d54e018 Mon Sep 17 00:00:00 2001
From: Mithil Shah <mithilcshah@gmail.com>
Date: Mon, 14 Aug 2023 14:36:30 +1000
Subject: [PATCH 18/22] flan xxl

---
 kendra_retriever_samples/app.py                  | 2 +-
 kendra_retriever_samples/kendra_chat_flan_xxl.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/kendra_retriever_samples/app.py b/kendra_retriever_samples/app.py
index 047f02b..c20ab6e 100644
--- a/kendra_retriever_samples/app.py
+++ b/kendra_retriever_samples/app.py
@@ -17,7 +17,7 @@
     'anthropic': 'Anthropic',
     'flanxl': 'Flan XL',
     'flanxxl': 'Flan XXL',
-    'falcon40b': 'Falcon 40B'
+    'falcon40b': 'Falcon 40B',
     'llama2' : 'Llama 2'
 }
 
diff --git a/kendra_retriever_samples/kendra_chat_flan_xxl.py b/kendra_retriever_samples/kendra_chat_flan_xxl.py
index 6b14d7c..d4aec42 100644
--- a/kendra_retriever_samples/kendra_chat_flan_xxl.py
+++ b/kendra_retriever_samples/kendra_chat_flan_xxl.py
@@ -30,13 +30,13 @@ class ContentHandler(LLMContentHandler):
       accepts = "application/json"
 
       def transform_input(self, prompt: str, model_kwargs: dict) -> bytes:
-          input_str = json.dumps({"text_inputs": prompt, **model_kwargs})
+          input_str = json.dumps({"inputs": prompt, "parameters": model_kwargs})
           print("input_str", input_str)
           return input_str.encode('utf-8')
       
       def transform_output(self, output: bytes) -> str:
           response_json = json.loads(output.read().decode("utf-8"))
-          return response_json["generated_texts"][0]
+          return response_json[0]["generated_text"]
 
   content_handler = ContentHandler()
 

From edcab2dba61a272b8e74f8a6d56219cad49c2c0a Mon Sep 17 00:00:00 2001
From: Mithil Shah <mithilcshah@gmail.com>
Date: Mon, 14 Aug 2023 14:46:42 +1000
Subject: [PATCH 19/22] flan xxl

---
 kendra_retriever_samples/kendra_chat_flan_xxl.py | 5 ++---
 kendra_retriever_samples/requirements.txt        | 2 +-
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/kendra_retriever_samples/kendra_chat_flan_xxl.py b/kendra_retriever_samples/kendra_chat_flan_xxl.py
index d4aec42..12cde86 100644
--- a/kendra_retriever_samples/kendra_chat_flan_xxl.py
+++ b/kendra_retriever_samples/kendra_chat_flan_xxl.py
@@ -30,13 +30,12 @@ class ContentHandler(LLMContentHandler):
       accepts = "application/json"
 
       def transform_input(self, prompt: str, model_kwargs: dict) -> bytes:
-          input_str = json.dumps({"inputs": prompt, "parameters": model_kwargs})
-          print("input_str", input_str)
+          input_str = json.dumps({"text_inputs": prompt, **model_kwargs})
           return input_str.encode('utf-8')
       
       def transform_output(self, output: bytes) -> str:
           response_json = json.loads(output.read().decode("utf-8"))
-          return response_json[0]["generated_text"]
+          return response_json["generated_texts"][0]
 
   content_handler = ContentHandler()
 
diff --git a/kendra_retriever_samples/requirements.txt b/kendra_retriever_samples/requirements.txt
index deffe81..cd34738 100644
--- a/kendra_retriever_samples/requirements.txt
+++ b/kendra_retriever_samples/requirements.txt
@@ -1,4 +1,4 @@
-langchain==0.0.219
+langchain==0.0.263
 boto3>=1.26.159
 openai
 anthropic

From 9ef8ccc432bd0e6e08b77e46966bf96e8a50217e Mon Sep 17 00:00:00 2001
From: Mithil Shah <mithilcshah@gmail.com>
Date: Tue, 15 Aug 2023 09:46:28 +1000
Subject: [PATCH 20/22] llama2

---
 kendra_retriever_samples/README.md                 | 4 ++++
 kendra_retriever_samples/kendra_chat_falcon_40b.py | 1 +
 kendra_retriever_samples/kendra_chat_llama_2.py    | 2 ++
 3 files changed, 7 insertions(+)

diff --git a/kendra_retriever_samples/README.md b/kendra_retriever_samples/README.md
index d12542e..e95d4ba 100644
--- a/kendra_retriever_samples/README.md
+++ b/kendra_retriever_samples/README.md
@@ -43,6 +43,7 @@ export FLAN_XL_ENDPOINT="<YOUR-SAGEMAKER-ENDPOINT-FOR-FLAN-T-XL>"
 export FLAN_XXL_ENDPOINT="<YOUR-SAGEMAKER-ENDPOINT-FOR-FLAN-T-XXL>"
 export OPENAI_API_KEY="<YOUR-OPEN-AI-API-KEY>"
 export ANTHROPIC_API_KEY="<YOUR-ANTHROPIC-API-KEY>"
+export FALCON_40B_ENDPOINT="<YOUR-SAGEMAKER-ENDPOINT-FOR-FALCON>"
 ```
 
 ### Running samples from the streamlit app
@@ -54,6 +55,9 @@ streamlit run app.py anthropic
 
 The above command will run the `kendra_chat_anthropic` as the LLM chain. In order to run a different chain, pass a different provider, for example for running the `open_ai` chain run this command `streamlit run app.py openai`.
 
+here are the list of providers
+ 'openai','anthropic','flanxl','flanxxl','falcon40b','llama2' 
+
 ### Running samples from the command line
 ```bash
 python <sample-file-name.py>
diff --git a/kendra_retriever_samples/kendra_chat_falcon_40b.py b/kendra_retriever_samples/kendra_chat_falcon_40b.py
index d49ac50..38e297d 100644
--- a/kendra_retriever_samples/kendra_chat_falcon_40b.py
+++ b/kendra_retriever_samples/kendra_chat_falcon_40b.py
@@ -30,6 +30,7 @@ class ContentHandler(LLMContentHandler):
       accepts = "application/json"
 
       def transform_input(self, prompt: str, model_kwargs: dict) -> bytes:
+          prompt = prompt[:1023]
           input_str = json.dumps({"inputs": prompt, "parameters": model_kwargs})
           print("input_str", input_str)
           return input_str.encode('utf-8')
diff --git a/kendra_retriever_samples/kendra_chat_llama_2.py b/kendra_retriever_samples/kendra_chat_llama_2.py
index c802a2a..ea9833e 100644
--- a/kendra_retriever_samples/kendra_chat_llama_2.py
+++ b/kendra_retriever_samples/kendra_chat_llama_2.py
@@ -37,6 +37,7 @@ def transform_input(self, prompt: str, model_kwargs: dict) -> bytes:
                                   ]],
                                   **model_kwargs
                                   })
+          print(input_str)
           return input_str.encode('utf-8')
       
       def transform_output(self, output: bytes) -> str:
@@ -89,6 +90,7 @@ def transform_output(self, output: bytes) -> str:
   return qa
 
 def run_chain(chain, prompt: str, history=[]):
+   print(prompt)
    return chain({"question": prompt, "chat_history": history})
 
 if __name__ == "__main__":

From 33dda3b7760588dd932b6bb4438d1d43f40fb5a6 Mon Sep 17 00:00:00 2001
From: MithilShah <mithilcshah@gmail.com>
Date: Wed, 16 Aug 2023 14:04:00 +1000
Subject: [PATCH 21/22] Revert "Jupyter notebook (RAG workflow demo)"

---
 .../genai-kendra-langchain.ipynb              | 1142 -----------------
 .../kendra_chat_flan_xl_nb.py                 |  145 ---
 .../skip_kernel_extension.py                  |   22 -
 3 files changed, 1309 deletions(-)
 delete mode 100644 kendra_retriever_samples/genai-kendra-langchain.ipynb
 delete mode 100644 kendra_retriever_samples/kendra_chat_flan_xl_nb.py
 delete mode 100644 kendra_retriever_samples/skip_kernel_extension.py

diff --git a/kendra_retriever_samples/genai-kendra-langchain.ipynb b/kendra_retriever_samples/genai-kendra-langchain.ipynb
deleted file mode 100644
index 80e4442..0000000
--- a/kendra_retriever_samples/genai-kendra-langchain.ipynb
+++ /dev/null
@@ -1,1142 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "id": "76653ab1-e168-45c7-8c45-2208e37f71d0",
-   "metadata": {},
-   "source": [
-    "## [GenAI applications on enterprise data with Amazon Kendra, LangChain and LLMs](https://aws.amazon.com/blogs/machine-learning/quickly-build-high-accuracy-generative-ai-applications-on-enterprise-data-using-amazon-kendra-langchain-and-large-language-models/)\n",
-    "\n",
-    "In this tutorial, we will demonstrate how to implement [Retrieval Augmented Generation](https://arxiv.org/abs/2005.11401) (RAG) workflows with [Amazon Kendra](https://aws.amazon.com/kendra/), [🦜️🔗 LangChain](https://python.langchain.com/en/latest/index.html) and state-of-the-art [Large Language Models](https://docs.cohere.com/docs/introduction-to-large-language-models) (LLM) to provide a conversational experience backed by data.\n",
-    "\n",
-    "> Visit the [Generative AI on AWS](https://aws.amazon.com/generative-ai/) landing page for the latest news on generative AI (GenAI) and learn how AWS is helping reinvent customer experiences and applications"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "id": "bb0f79b1-1124-43f7-a659-a6d1c249fa32",
-   "metadata": {},
-   "source": [
-    "### Architecture\n",
-    "\n",
-    "The diagram below shows the architecture of a GenAI application with a RAG approach:\n",
-    "\n",
-    "<img src=\"https://d2908q01vomqb2.cloudfront.net/f1f836cb4ea6efb2a0b1b99f41ad8b103eff4b59/2023/05/02/ML-13807-image001-new.png\" width=\"30%\"/>\n",
-    "\n",
-    "We use the [Amazon Kendra index](https://docs.aws.amazon.com/kendra/latest/dg/hiw-index.html) to hold large quantities of unstructured data from multiple [data sources](https://docs.aws.amazon.com/kendra/latest/dg/hiw-data-source.html), including:\n",
-    "\n",
-    "* Wiki pages\n",
-    "* [MS SharePoint sites](https://docs.aws.amazon.com/kendra/latest/dg/data-source-sharepoint.html)\n",
-    "* Document repositories like [Amazon S3](https://docs.aws.amazon.com/kendra/latest/dg/data-source-s3.html)\n",
-    "* ... *and much, much more!*\n",
-    "\n",
-    "Each time an user interacts with the GenAI app, the following will happen:\n",
-    "\n",
-    "1. The user makes a request to the GenAI app\n",
-    "2. The app issues a [search query](https://docs.aws.amazon.com/kendra/latest/dg/searching-example.html) to the Amazon Kendra index based on the user request\n",
-    "3. The index returns search results with excerpts of relevant documents from the ingested data\n",
-    "4. The app sends the user request along with the data retrieved from the index as context in the LLM prompt\n",
-    "5. The LLM returns a succint response to the user request based on the retrieved data\n",
-    "6. The response from the LLM is sent back to the user"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "7fceeb54-28ff-446e-9e66-2eb8c6d8464f",
-   "metadata": {},
-   "source": [
-    "### Prerequisites\n",
-    "\n",
-    "> **Note:** Tested with [Amazon SageMaker Studio](https://docs.aws.amazon.com/sagemaker/latest/dg/studio.html) on a `ml.t3.medium` (2 vCPU + 4 GiB) instance with the [Base Python 3.0 [`sagemaker-base-python-310`]](https://docs.aws.amazon.com/sagemaker/latest/dg/notebooks-available-images.html) image"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "c22974f6-b724-4f28-be12-51eb8fad2344",
-   "metadata": {},
-   "source": [
-    "For this demo, we will need a Python version compatible with [🦜️🔗 LangChain](https://pypi.org/project/langchain/) (`>=3.8.1, <4.0`)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "093092c2-be80-4233-ba8e-6e8b6c9bd7d4",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "import sys\n",
-    "!{sys.executable} -V"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "28631311-8d7a-4ce4-a453-e00e14cda932",
-   "metadata": {},
-   "source": [
-    "**Optional:** we will also need the [AWS CLI](https://aws.amazon.com/cli/) (`v2`) to create the Kendra index\n",
-    "\n",
-    "> For more information on how to upgrade the AWS CLI, see [Installing or updating the latest version of the AWS CLI](https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html)\n",
-    "\n",
-    "> When running this notebook through Amazon SageMaker, make sure the [execution role](https://docs.aws.amazon.com/sagemaker/latest/dg/sagemaker-roles.html) has enough permissions to run the commands"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "488dda35-9873-4b2a-b476-0fa2bcf696e8",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "!aws --version"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "bcab366a-8d84-4c85-97b1-878ac574edae",
-   "metadata": {},
-   "source": [
-    "and a recent version of the [SageMaker Python SDK](https://sagemaker.readthedocs.io/en/stable/) (`>=2.154.0`), containing the [SageMaker JumpStart SDK](https://github.com/aws/sagemaker-python-sdk/releases/tag/v2.154.0), to deploy the LLM to a SageMaker Endpoint."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "5210f2bc-b3c4-4789-954a-8b7e5e3e3bf6",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "# Set pip options\n",
-    "%env PIP_DISABLE_PIP_VERSION_CHECK True\n",
-    "%env PIP_ROOT_USER_ACTION ignore\n",
-    "\n",
-    "# Install/update SageMaker Python SDK\n",
-    "!{sys.executable} -m pip install -qU \"sagemaker>=2.154.0\"\n",
-    "!python -c \"import sagemaker; print(sagemaker.__version__)\""
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "ac1bf9d7-4f2f-4591-9208-1cf091daa8cc",
-   "metadata": {},
-   "source": [
-    "The variables below can be used to bypass **Optional** steps."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "e7b404ed-d4de-4133-aca9-1ae01828db0f",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "%load_ext skip_kernel_extension\n",
-    "\n",
-    "# Whether to skip the Kendra index deployment\n",
-    "SKIP_KENDRA_DEPLOYMENT = False\n",
-    "\n",
-    "# Stack name for the Kendra index deployment\n",
-    "KENDRA_STACK_NAME = \"genai-kendra-langchain\"\n",
-    "\n",
-    "# Whether to skip the quota increase request\n",
-    "SKIP_QUOTA_INCREASE = True\n",
-    "\n",
-    "# Whether Streamlit should be installed\n",
-    "SKIP_STREAMLIT_INSTALL = False"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "193d8512-cd1c-4f74-81fc-4706aaa3a495",
-   "metadata": {},
-   "source": [
-    "### Implement a RAG Workflow"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "b193c61d-1d51-40fb-bc22-fbdcd22d0c50",
-   "metadata": {},
-   "source": [
-    "The [AWS LangChain](https://github.com/aws-samples/amazon-kendra-langchain-extensions) repository contains a set of utility classes to work with LangChain, which includes a retriever class (`KendraIndexRetriever`) for working with a Kendra index and sample scripts to execute the Q&A chain for SageMaker, Open AI and Anthropic providers."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "0e61f630-2685-4c51-9955-f344e1b47cdd",
-   "metadata": {},
-   "source": [
-    "**Optional:** deploy the provided AWS CloudFormation template ([`samples/kendra-docs-index.yaml`](https://github.com/aws-samples/amazon-kendra-langchain-extensions/blob/main/samples/kendra-docs-index.yaml)) to create a new Kendra index"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "ade29089-ea81-4e04-be14-5e4aad4f030b",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "%%skip $SKIP_KENDRA_DEPLOYMENT\n",
-    "!aws cloudformation deploy --stack-name $KENDRA_STACK_NAME --template-file \"kendra-docs-index.yaml\" --capabilities CAPABILITY_NAMED_IAM"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "ec4abc48-f25b-4805-bd5e-904ef231f358",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%%skip $SKIP_KENDRA_DEPLOYMENT\n",
-    "!aws cloudformation describe-stacks --stack-name $KENDRA_STACK_NAME --query 'Stacks[0].Outputs[?OutputKey==`KendraIndexID`].OutputValue' --output text"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "59b7ee8a-b1d1-4747-a90a-5b2b3c1d8dbe",
-   "metadata": {},
-   "source": [
-    "**Optional:** consider requesting a quota increase via [AWS Service Quotas](https://docs.aws.amazon.com/general/latest/gr/aws_service_limits.html) on the size of the document excerpts returned by Amazon Kendra for a better experience"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "230aaa4e-2875-41c7-a56f-fc1db5b3e9ac",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "%%skip $SKIP_QUOTA_INCREASE\n",
-    "# Request a quota increase for the maximum number of characters displayed in the Document Excerpt of a Document type result in the Query API\n",
-    "# https://docs.aws.amazon.com/kendra/latest/APIReference/API_Query.html\n",
-    "!aws service-quotas request-service-quota-increase --service-code kendra --quota-code \"L-196E775D\" --desired-value 1000"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "id": "e570923b-efcc-4e3f-ab88-3afab8f17b79",
-   "metadata": {
-    "tags": []
-   },
-   "source": [
-    "**Optional:** Install Streamlit\n",
-    "\n",
-    "> [Streamlit](https://streamlit.io/) is an open source framework for building and sharing data apps. \n",
-    ">\n",
-    "> 💡 For a quick demo, try out the [Knowledge base > Tutorials](https://docs.streamlit.io/knowledge-base/tutorials)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "35987475-d40a-4720-8e32-096bc8286047",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "%%skip $SKIP_STREAMLIT_INSTALL\n",
-    "\n",
-    "# Install streamlit\n",
-    "# https://docs.streamlit.io/library/get-started/installation\n",
-    "!{sys.executable} -m pip install -qU $(grep streamlit requirements.txt)\n",
-    "\n",
-    "# Debug installation\n",
-    "# https://docs.streamlit.io/knowledge-base/using-streamlit/sanity-checks\n",
-    "!streamlit version"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "id": "3f882417-9345-4483-a7fd-e945f319b152",
-   "metadata": {
-    "tags": []
-   },
-   "source": [
-    "Install 🦜️🔗 LangChain\n",
-    "\n",
-    "> [LangChain](https://github.com/hwchase17/langchain) is an open-source framework for building *agentic* and *data-aware* applications powered by language models.\n",
-    ">\n",
-    "> 💡 For a quick intro, check out [Getting Started with LangChain: A Beginner’s Guide to Building LLM-Powered Applications](https://towardsdatascience.com/getting-started-with-langchain-a-beginners-guide-to-building-llm-powered-applications-95fc8898732c)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "62a3aea9-5632-442e-8a41-441dd3fa7b7c",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "# Install LangChain\n",
-    "# https://python.langchain.com/en/latest/getting_started/getting_started.html\n",
-    "!{sys.executable} -m pip install -qU $(grep langchain requirements.txt)\n",
-    "\n",
-    "# Debug installation\n",
-    "!python -c \"import langchain; print(langchain.__version__)\""
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "07479ad8-f3c9-4510-8f86-7567bd6f6251",
-   "metadata": {},
-   "source": [
-    "Now we need an LLM to handle user queries. \n",
-    "\n",
-    "Models like [Flan-T5-XL](https://huggingface.co/google/flan-t5-xl) and [Flan-T5-XXL](https://huggingface.co/google/flan-t5-xxl), which are available on [Hugging Face Transformers](https://huggingface.co/docs/transformers/model_doc/flan-t5), can be deployed via [Amazon SageMaker JumpStart](https://aws.amazon.com/sagemaker/jumpstart/) in a matter of minutes with just a few lines of code.\n",
-    "\n",
-    "<img src=\"https://d2908q01vomqb2.cloudfront.net/f1f836cb4ea6efb2a0b1b99f41ad8b103eff4b59/2023/04/25/ML-13807-image003.jpg\" width=\"50%\"/>"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "1064441d-6db4-43a5-a518-a187a08740c6",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "from sagemaker.jumpstart.model import JumpStartModel\n",
-    "\n",
-    "# Select model\n",
-    "# https://aws.amazon.com/sagemaker/jumpstart/getting-started\n",
-    "model_id = str(input(\"Model ID:\") or \"huggingface-text2text-flan-t5-xl\")\n",
-    "\n",
-    "# Deploy model\n",
-    "model = JumpStartModel(model_id=model_id)\n",
-    "predictor = model.deploy()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "9492a301-7299-46ca-a27f-08cf0bba3e59",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "# Test model\n",
-    "predictor.predict(\"Hey there! How are you?\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "73ef0c04-10e9-41d9-8a20-993f02f91901",
-   "metadata": {},
-   "source": [
-    "**Optional:** if you want to work with [Anthropic's `Claude-V1`](https://www.anthropic.com/index/introducing-claude) or [OpenAI's `da-vinci-003`](da-vinci-003), get the corresponding API key(s) and run the cell below."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "18d958fa-4ed9-4e1d-a1cf-c8ba04b9b830",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "import os\n",
-    "from getpass import getpass\n",
-    "\n",
-    "\"\"\"\n",
-    "OpenAI\n",
-    "https://python.langchain.com/en/latest/modules/models/llms/integrations/openai.html\n",
-    "\"\"\"\n",
-    "\n",
-    "# Get an API key from\n",
-    "# https://platform.openai.com/account/api-keys\n",
-    "OPENAI_API_KEY = getpass(\"OPENAI_API_KEY:\")\n",
-    "os.environ[\"OPENAI_API_KEY\"] = OPENAI_API_KEY\n",
-    "\n",
-    "\"\"\"\n",
-    "Anthropic\n",
-    "https://python.langchain.com/en/latest/modules/models/chat/integrations/anthropic.html\n",
-    "\"\"\"\n",
-    "\n",
-    "# Get an API key from\n",
-    "# https://www.anthropic.com/product\n",
-    "ANTHROPIC_API_KEY = getpass(\"ANTHROPIC_API_KEY:\")\n",
-    "os.environ[\"ANTHROPIC_API_KEY\"] = ANTHROPIC_API_KEY"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "0631867b-cfba-457e-a5bd-f09ba60f969f",
-   "metadata": {},
-   "source": [
-    "Install the `KendraIndexRetriever` interface and sample applications"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "2a46de18-8599-4300-a9d6-b88f19c316c3",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "# Install classes\n",
-    "!{sys.executable} -m pip install -qU .."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "db5825a3-9fe0-4ca6-a1b3-bc4ed39305a7",
-   "metadata": {},
-   "source": [
-    "Before running the sample application, we need to set up the environment variables with the Amazon Kendra index details (`KENDRA_INDEX_ID`) and the SageMaker Endpoints for the `FLAN-T5-*` models (`FLAN_*_ENDPOINT`)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "3fc6cc1c-a0ce-417c-a92f-bd1344132025",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "import re\n",
-    "\n",
-    "# Set Kendra index ID\n",
-    "os.environ['KENDRA_INDEX_ID'] = input('KENDRA_INDEX_ID:')\n",
-    "\n",
-    "# Set endpoint name\n",
-    "# https://github.com/aws/amazon-sagemaker-examples/blob/main/introduction_to_amazon_algorithms/jumpstart-foundation-models/text2text-generation-flan-t5.ipynb\n",
-    "if re.search(\"flan-t5-xl\", model_id):\n",
-    "    os.environ['FLAN_XL_ENDPOINT'] = predictor.endpoint_name\n",
-    "elif re.search(\"flan-t5-xxl\", model_id):\n",
-    "    os.environ['FLAN_XXL_ENDPOINT'] = predictor.endpoint_name\n",
-    "elif \"OPENAI_API_KEY\" in os.environ or \"ANTHROPIC_API_KEY\" in os.environ:\n",
-    "    print(\"Using external API key\")\n",
-    "else:\n",
-    "    print(\"⚠️ The SageMaker Endpoint environment variable is not set!\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "64a8fdc8-dd0c-4a9d-bb5c-b812221313e5",
-   "metadata": {
-    "tags": []
-   },
-   "source": [
-    "Finally, let's start the application 😊"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "b30fb0e2-f2af-4f5a-b8a0-d0d706b5d984",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "# Python\n",
-    "%run kendra_chat_flan_xl_nb.py"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "fb063d0f-515e-4f0d-97b2-95c65ca1ea01",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "# Streamlit\n",
-    "!streamlit run app.py flanxl"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "8f3000db-84b3-46fe-b6bc-a354ed8dcd18",
-   "metadata": {},
-   "source": [
-    "> **Note:** As of May 2023, Amazon SageMaker Studio doesn't allow apps to run through Jupyter Server Proxy on a Kernel Gateway. The best option is to use the [SageMaker SSH Helper](https://github.com/aws-samples/sagemaker-ssh-helper) library to do port forwarding to `server.port` (defaults to `8501`) cf. [Local IDE integration with SageMaker Studio over SSH for PyCharm / VSCode](https://github.com/aws-samples/sagemaker-ssh-helper#local-ide-integration-with-sagemaker-studio-over-ssh-for-pycharm--vscode) for more information."
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "id": "82a59dbc-55ba-4a15-b0f1-3e2d764d7fc5",
-   "metadata": {},
-   "source": [
-    "<img src=\"https://d2908q01vomqb2.cloudfront.net/f1f836cb4ea6efb2a0b1b99f41ad8b103eff4b59/2023/04/25/ML-13807-image005.jpg\" width=\"30%\"/>"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "5299dc29-fa23-407e-aba0-aea056979246",
-   "metadata": {},
-   "source": [
-    "### Cleanup\n",
-    "\n",
-    "Don't forget to delete the SageMaker Endpoint"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "8e2233be-e5e9-4c63-a694-605bf08bf46c",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "predictor.delete_endpoint()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "e47d328a-e236-4b6d-8462-59a38316f347",
-   "metadata": {},
-   "source": [
-    "and the Kendra index"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "f842c617-b74e-46b1-b7c7-79f2397657da",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "%%skip $SKIP_KENDRA_DEPLOYMENT\n",
-    "!aws cloudformation delete-stack --stack-name $KENDRA_STACK_NAME"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "d4feb917-1844-42bf-ba63-1f090173b389",
-   "metadata": {},
-   "source": [
-    "### References 📚\n",
-    "\n",
-    "* AWS ML Blog: [Quickly build high-accuracy Generative AI applications on enterprise data using Amazon Kendra, LangChain, and large language models](https://aws.amazon.com/blogs/machine-learning/quickly-build-high-accuracy-generative-ai-applications-on-enterprise-data-using-amazon-kendra-langchain-and-large-language-models/)\n",
-    "* AWS ML Blog: [Question answering using Retrieval Augmented Generation with foundation models in Amazon SageMaker JumpStart](https://aws.amazon.com/blogs/machine-learning/question-answering-using-retrieval-augmented-generation-with-foundation-models-in-amazon-sagemaker-jumpstart/)\n",
-    "* AWS ML Blog: [Dive deep into Amazon SageMaker Studio Notebooks architecture](https://aws.amazon.com/blogs/machine-learning/dive-deep-into-amazon-sagemaker-studio-notebook-architecture/)"
-   ]
-  }
- ],
- "metadata": {
-  "availableInstances": [
-   {
-    "_defaultOrder": 0,
-    "_isFastLaunch": true,
-    "category": "General purpose",
-    "gpuNum": 0,
-    "hideHardwareSpecs": false,
-    "memoryGiB": 4,
-    "name": "ml.t3.medium",
-    "vcpuNum": 2
-   },
-   {
-    "_defaultOrder": 1,
-    "_isFastLaunch": false,
-    "category": "General purpose",
-    "gpuNum": 0,
-    "hideHardwareSpecs": false,
-    "memoryGiB": 8,
-    "name": "ml.t3.large",
-    "vcpuNum": 2
-   },
-   {
-    "_defaultOrder": 2,
-    "_isFastLaunch": false,
-    "category": "General purpose",
-    "gpuNum": 0,
-    "hideHardwareSpecs": false,
-    "memoryGiB": 16,
-    "name": "ml.t3.xlarge",
-    "vcpuNum": 4
-   },
-   {
-    "_defaultOrder": 3,
-    "_isFastLaunch": false,
-    "category": "General purpose",
-    "gpuNum": 0,
-    "hideHardwareSpecs": false,
-    "memoryGiB": 32,
-    "name": "ml.t3.2xlarge",
-    "vcpuNum": 8
-   },
-   {
-    "_defaultOrder": 4,
-    "_isFastLaunch": true,
-    "category": "General purpose",
-    "gpuNum": 0,
-    "hideHardwareSpecs": false,
-    "memoryGiB": 8,
-    "name": "ml.m5.large",
-    "vcpuNum": 2
-   },
-   {
-    "_defaultOrder": 5,
-    "_isFastLaunch": false,
-    "category": "General purpose",
-    "gpuNum": 0,
-    "hideHardwareSpecs": false,
-    "memoryGiB": 16,
-    "name": "ml.m5.xlarge",
-    "vcpuNum": 4
-   },
-   {
-    "_defaultOrder": 6,
-    "_isFastLaunch": false,
-    "category": "General purpose",
-    "gpuNum": 0,
-    "hideHardwareSpecs": false,
-    "memoryGiB": 32,
-    "name": "ml.m5.2xlarge",
-    "vcpuNum": 8
-   },
-   {
-    "_defaultOrder": 7,
-    "_isFastLaunch": false,
-    "category": "General purpose",
-    "gpuNum": 0,
-    "hideHardwareSpecs": false,
-    "memoryGiB": 64,
-    "name": "ml.m5.4xlarge",
-    "vcpuNum": 16
-   },
-   {
-    "_defaultOrder": 8,
-    "_isFastLaunch": false,
-    "category": "General purpose",
-    "gpuNum": 0,
-    "hideHardwareSpecs": false,
-    "memoryGiB": 128,
-    "name": "ml.m5.8xlarge",
-    "vcpuNum": 32
-   },
-   {
-    "_defaultOrder": 9,
-    "_isFastLaunch": false,
-    "category": "General purpose",
-    "gpuNum": 0,
-    "hideHardwareSpecs": false,
-    "memoryGiB": 192,
-    "name": "ml.m5.12xlarge",
-    "vcpuNum": 48
-   },
-   {
-    "_defaultOrder": 10,
-    "_isFastLaunch": false,
-    "category": "General purpose",
-    "gpuNum": 0,
-    "hideHardwareSpecs": false,
-    "memoryGiB": 256,
-    "name": "ml.m5.16xlarge",
-    "vcpuNum": 64
-   },
-   {
-    "_defaultOrder": 11,
-    "_isFastLaunch": false,
-    "category": "General purpose",
-    "gpuNum": 0,
-    "hideHardwareSpecs": false,
-    "memoryGiB": 384,
-    "name": "ml.m5.24xlarge",
-    "vcpuNum": 96
-   },
-   {
-    "_defaultOrder": 12,
-    "_isFastLaunch": false,
-    "category": "General purpose",
-    "gpuNum": 0,
-    "hideHardwareSpecs": false,
-    "memoryGiB": 8,
-    "name": "ml.m5d.large",
-    "vcpuNum": 2
-   },
-   {
-    "_defaultOrder": 13,
-    "_isFastLaunch": false,
-    "category": "General purpose",
-    "gpuNum": 0,
-    "hideHardwareSpecs": false,
-    "memoryGiB": 16,
-    "name": "ml.m5d.xlarge",
-    "vcpuNum": 4
-   },
-   {
-    "_defaultOrder": 14,
-    "_isFastLaunch": false,
-    "category": "General purpose",
-    "gpuNum": 0,
-    "hideHardwareSpecs": false,
-    "memoryGiB": 32,
-    "name": "ml.m5d.2xlarge",
-    "vcpuNum": 8
-   },
-   {
-    "_defaultOrder": 15,
-    "_isFastLaunch": false,
-    "category": "General purpose",
-    "gpuNum": 0,
-    "hideHardwareSpecs": false,
-    "memoryGiB": 64,
-    "name": "ml.m5d.4xlarge",
-    "vcpuNum": 16
-   },
-   {
-    "_defaultOrder": 16,
-    "_isFastLaunch": false,
-    "category": "General purpose",
-    "gpuNum": 0,
-    "hideHardwareSpecs": false,
-    "memoryGiB": 128,
-    "name": "ml.m5d.8xlarge",
-    "vcpuNum": 32
-   },
-   {
-    "_defaultOrder": 17,
-    "_isFastLaunch": false,
-    "category": "General purpose",
-    "gpuNum": 0,
-    "hideHardwareSpecs": false,
-    "memoryGiB": 192,
-    "name": "ml.m5d.12xlarge",
-    "vcpuNum": 48
-   },
-   {
-    "_defaultOrder": 18,
-    "_isFastLaunch": false,
-    "category": "General purpose",
-    "gpuNum": 0,
-    "hideHardwareSpecs": false,
-    "memoryGiB": 256,
-    "name": "ml.m5d.16xlarge",
-    "vcpuNum": 64
-   },
-   {
-    "_defaultOrder": 19,
-    "_isFastLaunch": false,
-    "category": "General purpose",
-    "gpuNum": 0,
-    "hideHardwareSpecs": false,
-    "memoryGiB": 384,
-    "name": "ml.m5d.24xlarge",
-    "vcpuNum": 96
-   },
-   {
-    "_defaultOrder": 20,
-    "_isFastLaunch": false,
-    "category": "General purpose",
-    "gpuNum": 0,
-    "hideHardwareSpecs": true,
-    "memoryGiB": 0,
-    "name": "ml.geospatial.interactive",
-    "supportedImageNames": [
-     "sagemaker-geospatial-v1-0"
-    ],
-    "vcpuNum": 0
-   },
-   {
-    "_defaultOrder": 21,
-    "_isFastLaunch": true,
-    "category": "Compute optimized",
-    "gpuNum": 0,
-    "hideHardwareSpecs": false,
-    "memoryGiB": 4,
-    "name": "ml.c5.large",
-    "vcpuNum": 2
-   },
-   {
-    "_defaultOrder": 22,
-    "_isFastLaunch": false,
-    "category": "Compute optimized",
-    "gpuNum": 0,
-    "hideHardwareSpecs": false,
-    "memoryGiB": 8,
-    "name": "ml.c5.xlarge",
-    "vcpuNum": 4
-   },
-   {
-    "_defaultOrder": 23,
-    "_isFastLaunch": false,
-    "category": "Compute optimized",
-    "gpuNum": 0,
-    "hideHardwareSpecs": false,
-    "memoryGiB": 16,
-    "name": "ml.c5.2xlarge",
-    "vcpuNum": 8
-   },
-   {
-    "_defaultOrder": 24,
-    "_isFastLaunch": false,
-    "category": "Compute optimized",
-    "gpuNum": 0,
-    "hideHardwareSpecs": false,
-    "memoryGiB": 32,
-    "name": "ml.c5.4xlarge",
-    "vcpuNum": 16
-   },
-   {
-    "_defaultOrder": 25,
-    "_isFastLaunch": false,
-    "category": "Compute optimized",
-    "gpuNum": 0,
-    "hideHardwareSpecs": false,
-    "memoryGiB": 72,
-    "name": "ml.c5.9xlarge",
-    "vcpuNum": 36
-   },
-   {
-    "_defaultOrder": 26,
-    "_isFastLaunch": false,
-    "category": "Compute optimized",
-    "gpuNum": 0,
-    "hideHardwareSpecs": false,
-    "memoryGiB": 96,
-    "name": "ml.c5.12xlarge",
-    "vcpuNum": 48
-   },
-   {
-    "_defaultOrder": 27,
-    "_isFastLaunch": false,
-    "category": "Compute optimized",
-    "gpuNum": 0,
-    "hideHardwareSpecs": false,
-    "memoryGiB": 144,
-    "name": "ml.c5.18xlarge",
-    "vcpuNum": 72
-   },
-   {
-    "_defaultOrder": 28,
-    "_isFastLaunch": false,
-    "category": "Compute optimized",
-    "gpuNum": 0,
-    "hideHardwareSpecs": false,
-    "memoryGiB": 192,
-    "name": "ml.c5.24xlarge",
-    "vcpuNum": 96
-   },
-   {
-    "_defaultOrder": 29,
-    "_isFastLaunch": true,
-    "category": "Accelerated computing",
-    "gpuNum": 1,
-    "hideHardwareSpecs": false,
-    "memoryGiB": 16,
-    "name": "ml.g4dn.xlarge",
-    "vcpuNum": 4
-   },
-   {
-    "_defaultOrder": 30,
-    "_isFastLaunch": false,
-    "category": "Accelerated computing",
-    "gpuNum": 1,
-    "hideHardwareSpecs": false,
-    "memoryGiB": 32,
-    "name": "ml.g4dn.2xlarge",
-    "vcpuNum": 8
-   },
-   {
-    "_defaultOrder": 31,
-    "_isFastLaunch": false,
-    "category": "Accelerated computing",
-    "gpuNum": 1,
-    "hideHardwareSpecs": false,
-    "memoryGiB": 64,
-    "name": "ml.g4dn.4xlarge",
-    "vcpuNum": 16
-   },
-   {
-    "_defaultOrder": 32,
-    "_isFastLaunch": false,
-    "category": "Accelerated computing",
-    "gpuNum": 1,
-    "hideHardwareSpecs": false,
-    "memoryGiB": 128,
-    "name": "ml.g4dn.8xlarge",
-    "vcpuNum": 32
-   },
-   {
-    "_defaultOrder": 33,
-    "_isFastLaunch": false,
-    "category": "Accelerated computing",
-    "gpuNum": 4,
-    "hideHardwareSpecs": false,
-    "memoryGiB": 192,
-    "name": "ml.g4dn.12xlarge",
-    "vcpuNum": 48
-   },
-   {
-    "_defaultOrder": 34,
-    "_isFastLaunch": false,
-    "category": "Accelerated computing",
-    "gpuNum": 1,
-    "hideHardwareSpecs": false,
-    "memoryGiB": 256,
-    "name": "ml.g4dn.16xlarge",
-    "vcpuNum": 64
-   },
-   {
-    "_defaultOrder": 35,
-    "_isFastLaunch": false,
-    "category": "Accelerated computing",
-    "gpuNum": 1,
-    "hideHardwareSpecs": false,
-    "memoryGiB": 61,
-    "name": "ml.p3.2xlarge",
-    "vcpuNum": 8
-   },
-   {
-    "_defaultOrder": 36,
-    "_isFastLaunch": false,
-    "category": "Accelerated computing",
-    "gpuNum": 4,
-    "hideHardwareSpecs": false,
-    "memoryGiB": 244,
-    "name": "ml.p3.8xlarge",
-    "vcpuNum": 32
-   },
-   {
-    "_defaultOrder": 37,
-    "_isFastLaunch": false,
-    "category": "Accelerated computing",
-    "gpuNum": 8,
-    "hideHardwareSpecs": false,
-    "memoryGiB": 488,
-    "name": "ml.p3.16xlarge",
-    "vcpuNum": 64
-   },
-   {
-    "_defaultOrder": 38,
-    "_isFastLaunch": false,
-    "category": "Accelerated computing",
-    "gpuNum": 8,
-    "hideHardwareSpecs": false,
-    "memoryGiB": 768,
-    "name": "ml.p3dn.24xlarge",
-    "vcpuNum": 96
-   },
-   {
-    "_defaultOrder": 39,
-    "_isFastLaunch": false,
-    "category": "Memory Optimized",
-    "gpuNum": 0,
-    "hideHardwareSpecs": false,
-    "memoryGiB": 16,
-    "name": "ml.r5.large",
-    "vcpuNum": 2
-   },
-   {
-    "_defaultOrder": 40,
-    "_isFastLaunch": false,
-    "category": "Memory Optimized",
-    "gpuNum": 0,
-    "hideHardwareSpecs": false,
-    "memoryGiB": 32,
-    "name": "ml.r5.xlarge",
-    "vcpuNum": 4
-   },
-   {
-    "_defaultOrder": 41,
-    "_isFastLaunch": false,
-    "category": "Memory Optimized",
-    "gpuNum": 0,
-    "hideHardwareSpecs": false,
-    "memoryGiB": 64,
-    "name": "ml.r5.2xlarge",
-    "vcpuNum": 8
-   },
-   {
-    "_defaultOrder": 42,
-    "_isFastLaunch": false,
-    "category": "Memory Optimized",
-    "gpuNum": 0,
-    "hideHardwareSpecs": false,
-    "memoryGiB": 128,
-    "name": "ml.r5.4xlarge",
-    "vcpuNum": 16
-   },
-   {
-    "_defaultOrder": 43,
-    "_isFastLaunch": false,
-    "category": "Memory Optimized",
-    "gpuNum": 0,
-    "hideHardwareSpecs": false,
-    "memoryGiB": 256,
-    "name": "ml.r5.8xlarge",
-    "vcpuNum": 32
-   },
-   {
-    "_defaultOrder": 44,
-    "_isFastLaunch": false,
-    "category": "Memory Optimized",
-    "gpuNum": 0,
-    "hideHardwareSpecs": false,
-    "memoryGiB": 384,
-    "name": "ml.r5.12xlarge",
-    "vcpuNum": 48
-   },
-   {
-    "_defaultOrder": 45,
-    "_isFastLaunch": false,
-    "category": "Memory Optimized",
-    "gpuNum": 0,
-    "hideHardwareSpecs": false,
-    "memoryGiB": 512,
-    "name": "ml.r5.16xlarge",
-    "vcpuNum": 64
-   },
-   {
-    "_defaultOrder": 46,
-    "_isFastLaunch": false,
-    "category": "Memory Optimized",
-    "gpuNum": 0,
-    "hideHardwareSpecs": false,
-    "memoryGiB": 768,
-    "name": "ml.r5.24xlarge",
-    "vcpuNum": 96
-   },
-   {
-    "_defaultOrder": 47,
-    "_isFastLaunch": false,
-    "category": "Accelerated computing",
-    "gpuNum": 1,
-    "hideHardwareSpecs": false,
-    "memoryGiB": 16,
-    "name": "ml.g5.xlarge",
-    "vcpuNum": 4
-   },
-   {
-    "_defaultOrder": 48,
-    "_isFastLaunch": false,
-    "category": "Accelerated computing",
-    "gpuNum": 1,
-    "hideHardwareSpecs": false,
-    "memoryGiB": 32,
-    "name": "ml.g5.2xlarge",
-    "vcpuNum": 8
-   },
-   {
-    "_defaultOrder": 49,
-    "_isFastLaunch": false,
-    "category": "Accelerated computing",
-    "gpuNum": 1,
-    "hideHardwareSpecs": false,
-    "memoryGiB": 64,
-    "name": "ml.g5.4xlarge",
-    "vcpuNum": 16
-   },
-   {
-    "_defaultOrder": 50,
-    "_isFastLaunch": false,
-    "category": "Accelerated computing",
-    "gpuNum": 1,
-    "hideHardwareSpecs": false,
-    "memoryGiB": 128,
-    "name": "ml.g5.8xlarge",
-    "vcpuNum": 32
-   },
-   {
-    "_defaultOrder": 51,
-    "_isFastLaunch": false,
-    "category": "Accelerated computing",
-    "gpuNum": 1,
-    "hideHardwareSpecs": false,
-    "memoryGiB": 256,
-    "name": "ml.g5.16xlarge",
-    "vcpuNum": 64
-   },
-   {
-    "_defaultOrder": 52,
-    "_isFastLaunch": false,
-    "category": "Accelerated computing",
-    "gpuNum": 4,
-    "hideHardwareSpecs": false,
-    "memoryGiB": 192,
-    "name": "ml.g5.12xlarge",
-    "vcpuNum": 48
-   },
-   {
-    "_defaultOrder": 53,
-    "_isFastLaunch": false,
-    "category": "Accelerated computing",
-    "gpuNum": 4,
-    "hideHardwareSpecs": false,
-    "memoryGiB": 384,
-    "name": "ml.g5.24xlarge",
-    "vcpuNum": 96
-   },
-   {
-    "_defaultOrder": 54,
-    "_isFastLaunch": false,
-    "category": "Accelerated computing",
-    "gpuNum": 8,
-    "hideHardwareSpecs": false,
-    "memoryGiB": 768,
-    "name": "ml.g5.48xlarge",
-    "vcpuNum": 192
-   },
-   {
-    "_defaultOrder": 55,
-    "_isFastLaunch": false,
-    "category": "Accelerated computing",
-    "gpuNum": 8,
-    "hideHardwareSpecs": false,
-    "memoryGiB": 1152,
-    "name": "ml.p4d.24xlarge",
-    "vcpuNum": 96
-   },
-   {
-    "_defaultOrder": 56,
-    "_isFastLaunch": false,
-    "category": "Accelerated computing",
-    "gpuNum": 8,
-    "hideHardwareSpecs": false,
-    "memoryGiB": 1152,
-    "name": "ml.p4de.24xlarge",
-    "vcpuNum": 96
-   }
-  ],
-  "instance_type": "ml.t3.medium",
-  "kernelspec": {
-   "display_name": "Python 3 (Base Python 3.0)",
-   "language": "python",
-   "name": "python3__SAGEMAKER_INTERNAL__arn:aws:sagemaker:us-east-1:081325390199:image/sagemaker-base-python-310-v1"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.10.8"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/kendra_retriever_samples/kendra_chat_flan_xl_nb.py b/kendra_retriever_samples/kendra_chat_flan_xl_nb.py
deleted file mode 100644
index ff57489..0000000
--- a/kendra_retriever_samples/kendra_chat_flan_xl_nb.py
+++ /dev/null
@@ -1,145 +0,0 @@
-# pylint: disable=invalid-name,line-too-long
-"""
-Adapted from
-https://github.com/aws-samples/amazon-kendra-langchain-extensions/blob/main/samples/kendra_chat_flan_xl.py
-"""
-
-import json
-import os
-
-from langchain.chains import ConversationalRetrievalChain
-from langchain.prompts import PromptTemplate
-from langchain import SagemakerEndpoint
-from langchain.llms.sagemaker_endpoint import ContentHandlerBase
-
-from aws_langchain.kendra_index_retriever import KendraIndexRetriever
-
-class bcolors:  #pylint: disable=too-few-public-methods
-    """
-    ANSI escape sequences
-    https://stackoverflow.com/questions/287871/how-do-i-print-colored-text-to-the-terminal
-    """
-    HEADER = '\033[95m'
-    OKBLUE = '\033[94m'
-    OKCYAN = '\033[96m'
-    OKGREEN = '\033[92m'
-    WARNING = '\033[93m'
-    FAIL = '\033[91m'
-    ENDC = '\033[0m'
-    BOLD = '\033[1m'
-    UNDERLINE = '\033[4m'
-
-MAX_HISTORY_LENGTH = 5
-
-def build_chain():
-    """
-    Builds the LangChain chain
-    """
-    region = os.environ["AWS_REGION"]
-    kendra_index_id = os.environ["KENDRA_INDEX_ID"]
-    endpoint_name = os.environ["FLAN_XL_ENDPOINT"]
-
-    class ContentHandler(ContentHandlerBase):
-        """
-        Handler class to transform input and ouput
-        into a format that the SageMaker Endpoint can understand
-        """
-        content_type = "application/json"
-        accepts = "application/json"
-
-        def transform_input(self, prompt: str, model_kwargs: dict) -> bytes:
-            input_str = json.dumps({"text_inputs": prompt, **model_kwargs})
-            return input_str.encode('utf-8')
-
-        def transform_output(self, output: bytes) -> str:
-            response_json = json.loads(output.read().decode("utf-8"))
-            return response_json["generated_texts"][0]
-
-    content_handler = ContentHandler()
-
-    # Initialize LLM hosted on a SageMaker endpoint
-    # https://python.langchain.com/en/latest/modules/models/llms/integrations/sagemaker.html
-    llm=SagemakerEndpoint(
-        endpoint_name=endpoint_name,
-        region_name="us-east-1",
-        model_kwargs={"temperature":1e-10, "max_length": 500},
-        content_handler=content_handler
-    )
-
-    # Initialize Kendra index retriever
-    retriever = KendraIndexRetriever(
-       kendraindex=kendra_index_id,
-       awsregion=region,
-       return_source_documents=True
-    )
-
-    # Define prompt template
-    # https://python.langchain.com/en/latest/modules/prompts/prompt_templates.html
-    prompt_template = """
-The following is a friendly conversation between a human and an AI. 
-The AI is talkative and provides lots of specific details from its context.
-If the AI does not know the answer to a question, it truthfully says it 
-does not know.
-{context}
-Instruction: Based on the above documents, provide a detailed answer for,
-{question} Answer "don't know" if not present in the document. Solution:
-"""
-    qa_prompt = PromptTemplate(
-        template=prompt_template, input_variables=["context", "question"]
-    )
-
-    # Initialize QA chain with chat history
-    # https://python.langchain.com/en/latest/modules/chains/index_examples/chat_vector_db.html
-    qa = ConversationalRetrievalChain.from_llm(  #
-        llm=llm,
-        retriever=retriever,
-        qa_prompt=qa_prompt,
-        return_source_documents=True
-    )
-
-    return qa
-
-def run_chain(chain, prompt: str, history=None):
-    """
-    Runs the Q&A chain given a user prompt and chat history
-    """
-    if history is None:
-        history = []
-    return chain({"question": prompt, "chat_history": history})
-
-def prompt_user():
-    """
-    Helper function to get user input
-    """
-    print(f"{bcolors.OKBLUE}Hello! How can I help you?{bcolors.ENDC}")
-    print(f"{bcolors.OKCYAN}Ask a question, start a New search: or Stop cell execution to exit.{bcolors.ENDC}")
-    return input(">")
-
-if __name__ == "__main__":
-    # Initialize chat history
-    chat_history = []
-
-    # Initialize Q&A chain
-    qa_chain = build_chain()
-
-    try:
-        while query := prompt_user():
-            # Process user input in case of a new search
-            if query.strip().lower().startswith("new search:"):
-                query = query.strip().lower().replace("new search:", "")
-                chat_history = []
-            if len(chat_history) == MAX_HISTORY_LENGTH:
-                chat_history.pop(0)
-
-            # Show answer and keep a record
-            result = run_chain(qa_chain, query, chat_history)
-            chat_history.append((query, result["answer"]))
-            print(f"{bcolors.OKGREEN}{result['answer']}{bcolors.ENDC}")
-
-            # Show sources
-            if 'source_documents' in result:
-                print(bcolors.OKGREEN + 'Sources:')
-                for doc in result['source_documents']:
-                    print(f"+ {doc.metadata['source']}")
-    except KeyboardInterrupt:
-        pass
diff --git a/kendra_retriever_samples/skip_kernel_extension.py b/kendra_retriever_samples/skip_kernel_extension.py
deleted file mode 100644
index b688f3a..0000000
--- a/kendra_retriever_samples/skip_kernel_extension.py
+++ /dev/null
@@ -1,22 +0,0 @@
-"""
-Custom kernel extension to add %%skip magic and control cell execution
-
-Adapted from
-https://github.com/ipython/ipython/issues/11582
-https://stackoverflow.com/questions/26494747/simple-way-to-choose-which-cells-to-run-in-ipython-notebook-during-run-all
-"""
-
-def skip(line, cell=None):
-    '''Skips execution of the current line/cell if line evaluates to True.'''
-    if eval(line):
-        return
-
-    get_ipython().run_cell(cell)
-
-def load_ipython_extension(shell):
-    '''Registers the skip magic when the extension loads.'''
-    shell.register_magic_function(skip, 'line_cell')
-
-def unload_ipython_extension(shell):
-    '''Unregisters the skip magic when the extension unloads.'''
-    del shell.magics_manager.magics['cell']['skip']
\ No newline at end of file

From 2b33fb676785bd1369da61efc533e31db36b16d2 Mon Sep 17 00:00:00 2001
From: Mithil Shah <mithilcshah@gmail.com>
Date: Wed, 16 Aug 2023 14:11:54 +1000
Subject: [PATCH 22/22] boto3 version

---
 kendra_retriever_samples/README.md            | 40 +++++++++++++------
 .../kendra_chat_anthropic.py                  |  2 +-
 .../kendra_chat_falcon_40b.py                 |  2 +-
 .../kendra_chat_flan_xl.py                    |  2 +-
 .../kendra_chat_flan_xxl.py                   |  2 +-
 .../kendra_chat_llama_2.py                    |  2 +-
 .../kendra_retriever_anthropic.py             |  3 +-
 .../kendra_retriever_falcon_40b.py            |  2 +-
 .../kendra_retriever_flan_xl.py               |  2 +-
 .../kendra_retriever_flan_xxl.py              |  3 +-
 .../kendra_retriever_open_ai.py               |  2 +-
 kendra_retriever_samples/requirements.txt     |  2 +-
 12 files changed, 39 insertions(+), 25 deletions(-)

diff --git a/kendra_retriever_samples/README.md b/kendra_retriever_samples/README.md
index e95d4ba..2110deb 100644
--- a/kendra_retriever_samples/README.md
+++ b/kendra_retriever_samples/README.md
@@ -31,32 +31,46 @@ conda env create -f environment.yml
 ```
 
 ## Running samples
-Ensure that the environment variables are set for the aws region, kendra index id and the provider/model used by the sample.
-For example, for running the `kendra_chat_flan_xl.py` sample, these environment variables must be set: AWS_REGION, KENDRA_INDEX_ID
-and FLAN_XL_ENDPOINT.
+Before you run the sample, you need to deploy a Large Language Model (or get an API key if you using Anthropic or OPENAI). The samples in this repository have been tested on models deployed using SageMaker Jumpstart.  The model id for the LLMS are specified in the table below.
+
+
+| Model name | env var name | Jumpstart model id | streamlit provider name |
+| -----------| -------- | ------------------ |  ----------------- |
+| Flan XL | FLAN_XL_ENDPOINT | huggingface-text2text-flan-t5-xl | flanxl |
+| Flan XXL | FLAN_XXL_ENDPOINT | huggingface-text2text-flan-t5-xxl | flanxxl |
+| Falcon 40B instruct | FALCON_40B_ENDPOINT | huggingface-llm-falcon-40b-instruct-bf16 | falcon40b |
+| Llama2 70B instruct | LLAMA_2_ENDPOINT | meta-textgeneration-llama-2-70b-f | llama2 |
+
+
+after deploying the LLM, set up environment variables for kendra id, aws_region and the endpoint name (or the API key for an external provider)
+
+For example, for running the `kendra_chat_flan_xl.py` sample, these environment variables must be set: AWS_REGION, KENDRA_INDEX_ID and FLAN_XL_ENDPOINT. 
+
+You can use commands as below to set the environment variables. Only set the environment variable for the provider that you are using. For example, if you are using Flan-xl only set the FLAN_XXL_ENDPOINT. There is no need to set the other Endpoints and keys. 
 
-You can use commands as below to set the environment variables.
 ```bash
 export AWS_REGION="<YOUR-AWS-REGION>"
 export KENDRA_INDEX_ID="<YOUR-KENDRA-INDEX-ID>"
-export FLAN_XL_ENDPOINT="<YOUR-SAGEMAKER-ENDPOINT-FOR-FLAN-T-XL>"
-export FLAN_XXL_ENDPOINT="<YOUR-SAGEMAKER-ENDPOINT-FOR-FLAN-T-XXL>"
-export OPENAI_API_KEY="<YOUR-OPEN-AI-API-KEY>"
-export ANTHROPIC_API_KEY="<YOUR-ANTHROPIC-API-KEY>"
-export FALCON_40B_ENDPOINT="<YOUR-SAGEMAKER-ENDPOINT-FOR-FALCON>"
+export FLAN_XL_ENDPOINT="<YOUR-SAGEMAKER-ENDPOINT-FOR-FLAN-T-XL>" # only if you are using FLAN_XL
+export FLAN_XXL_ENDPOINT="<YOUR-SAGEMAKER-ENDPOINT-FOR-FLAN-T-XXL>" # only if you are using FLAN_XXL
+export FALCON_40B_ENDPOINT="<YOUR-SAGEMAKER-ENDPOINT-FOR-FALCON>" # only if you are using falcon as the endpoint
+export LLAMA_2_ENDPOINT="<YOUR-SAGEMAKER-ENDPOINT-FOR-LLAMA2>" #only if you are using llama2 as the endpoint
+
+export OPENAI_API_KEY="<YOUR-OPEN-AI-API-KEY>" #  only if you are using OPENAI as the endpoint
+export ANTHROPIC_API_KEY="<YOUR-ANTHROPIC-API-KEY>" #  only if you are using Anthropic as the endpoint
 ```
 
+
 ### Running samples from the streamlit app
 The samples directory is bundled with an `app.py` file that can be run as a web app using streamlit. 
 
 ```bash
-streamlit run app.py anthropic
+streamlit run app.py llama2
 ```
 
-The above command will run the `kendra_chat_anthropic` as the LLM chain. In order to run a different chain, pass a different provider, for example for running the `open_ai` chain run this command `streamlit run app.py openai`.
+The above command will run the `kendra_chat_llama_2` as the LLM chain. In order to run a different chain, pass a different provider, for example for running the `open_ai` chain run this command `streamlit run app.py openai`. Use the column 'streamlit provider name' from the table above to find out the provider name
+
 
-here are the list of providers
- 'openai','anthropic','flanxl','flanxxl','falcon40b','llama2' 
 
 ### Running samples from the command line
 ```bash
diff --git a/kendra_retriever_samples/kendra_chat_anthropic.py b/kendra_retriever_samples/kendra_chat_anthropic.py
index 856a8a6..f046027 100644
--- a/kendra_retriever_samples/kendra_chat_anthropic.py
+++ b/kendra_retriever_samples/kendra_chat_anthropic.py
@@ -25,7 +25,7 @@ def build_chain():
 
   llm = Anthropic(temperature=0, anthropic_api_key=ANTHROPIC_API_KEY, max_tokens_to_sample = 512)
       
-  retriever = AmazonKendraRetriever(index_id=kendra_index_id)
+  retriever = AmazonKendraRetriever(index_id=kendra_index_id,region_name=region)
 
   prompt_template = """
 
diff --git a/kendra_retriever_samples/kendra_chat_falcon_40b.py b/kendra_retriever_samples/kendra_chat_falcon_40b.py
index 38e297d..72c4bac 100644
--- a/kendra_retriever_samples/kendra_chat_falcon_40b.py
+++ b/kendra_retriever_samples/kendra_chat_falcon_40b.py
@@ -57,7 +57,7 @@ def transform_output(self, output: bytes) -> str:
           content_handler=content_handler
       )
       
-  retriever = AmazonKendraRetriever(index_id=kendra_index_id)
+  retriever = AmazonKendraRetriever(index_id=kendra_index_id,region_name=region)
 
   prompt_template = """
   The following is a friendly conversation between a human and an AI. 
diff --git a/kendra_retriever_samples/kendra_chat_flan_xl.py b/kendra_retriever_samples/kendra_chat_flan_xl.py
index 18d828a..3f8ee34 100644
--- a/kendra_retriever_samples/kendra_chat_flan_xl.py
+++ b/kendra_retriever_samples/kendra_chat_flan_xl.py
@@ -46,7 +46,7 @@ def transform_output(self, output: bytes) -> str:
           content_handler=content_handler
       )
       
-  retriever = AmazonKendraRetriever(index_id=kendra_index_id)
+  retriever = AmazonKendraRetriever(index_id=kendra_index_id,region_name=region)
 
   prompt_template = """
   The following is a friendly conversation between a human and an AI. 
diff --git a/kendra_retriever_samples/kendra_chat_flan_xxl.py b/kendra_retriever_samples/kendra_chat_flan_xxl.py
index 12cde86..1eb3fed 100644
--- a/kendra_retriever_samples/kendra_chat_flan_xxl.py
+++ b/kendra_retriever_samples/kendra_chat_flan_xxl.py
@@ -46,7 +46,7 @@ def transform_output(self, output: bytes) -> str:
           content_handler=content_handler
       )
       
-  retriever = AmazonKendraRetriever(index_id=kendra_index_id)
+  retriever = AmazonKendraRetriever(index_id=kendra_index_id,region_name=region)
 
   prompt_template = """
   The following is a friendly conversation between a human and an AI. 
diff --git a/kendra_retriever_samples/kendra_chat_llama_2.py b/kendra_retriever_samples/kendra_chat_llama_2.py
index ea9833e..7c2d527 100644
--- a/kendra_retriever_samples/kendra_chat_llama_2.py
+++ b/kendra_retriever_samples/kendra_chat_llama_2.py
@@ -55,7 +55,7 @@ def transform_output(self, output: bytes) -> str:
           content_handler=content_handler,
       )
       
-  retriever = AmazonKendraRetriever(index_id=kendra_index_id)
+  retriever = AmazonKendraRetriever(index_id=kendra_index_id,region_name=region)
 
   prompt_template = """
   The following is a friendly conversation between a human and an AI. 
diff --git a/kendra_retriever_samples/kendra_retriever_anthropic.py b/kendra_retriever_samples/kendra_retriever_anthropic.py
index 2344398..d943e18 100644
--- a/kendra_retriever_samples/kendra_retriever_anthropic.py
+++ b/kendra_retriever_samples/kendra_retriever_anthropic.py
@@ -12,7 +12,8 @@ def build_chain():
 
   llm = Anthropic(temperature=0, anthropic_api_key=ANTHROPIC_API_KEY)
         
-  retriever = AmazonKendraRetriever(index_id=kendra_index_id)
+
+  retriever = AmazonKendraRetriever(index_id=kendra_index_id,region_name=region)
 
   prompt_template = """
 
diff --git a/kendra_retriever_samples/kendra_retriever_falcon_40b.py b/kendra_retriever_samples/kendra_retriever_falcon_40b.py
index bb10db9..79860cb 100644
--- a/kendra_retriever_samples/kendra_retriever_falcon_40b.py
+++ b/kendra_retriever_samples/kendra_retriever_falcon_40b.py
@@ -35,7 +35,7 @@ def transform_output(self, output: bytes) -> str:
             content_handler=content_handler
         )
 
-    retriever = AmazonKendraRetriever(index_id=kendra_index_id)
+    retriever = AmazonKendraRetriever(index_id=kendra_index_id,region_name=region)
 
     prompt_template = """
     The following is a friendly conversation between a human and an AI. 
diff --git a/kendra_retriever_samples/kendra_retriever_flan_xl.py b/kendra_retriever_samples/kendra_retriever_flan_xl.py
index 15fda0a..7c3f680 100644
--- a/kendra_retriever_samples/kendra_retriever_flan_xl.py
+++ b/kendra_retriever_samples/kendra_retriever_flan_xl.py
@@ -34,7 +34,7 @@ def transform_output(self, output: bytes) -> str:
             content_handler=content_handler
         )
 
-    retriever = AmazonKendraRetriever(index_id=kendra_index_id)
+    retriever = AmazonKendraRetriever(index_id=kendra_index_id,region_name=region)
 
     prompt_template = """
     The following is a friendly conversation between a human and an AI. 
diff --git a/kendra_retriever_samples/kendra_retriever_flan_xxl.py b/kendra_retriever_samples/kendra_retriever_flan_xxl.py
index dd8498f..390d0ce 100644
--- a/kendra_retriever_samples/kendra_retriever_flan_xxl.py
+++ b/kendra_retriever_samples/kendra_retriever_flan_xxl.py
@@ -34,8 +34,7 @@ def transform_output(self, output: bytes) -> str:
             model_kwargs={"temperature":1e-10, "max_length": 500},
             content_handler=content_handler
         )
-
-    retriever = AmazonKendraRetriever(index_id=kendra_index_id)
+    retriever = AmazonKendraRetriever(index_id=kendra_index_id,region_name=region)
 
     prompt_template = """
     The following is a friendly conversation between a human and an AI. 
diff --git a/kendra_retriever_samples/kendra_retriever_open_ai.py b/kendra_retriever_samples/kendra_retriever_open_ai.py
index d370684..95b4fc0 100644
--- a/kendra_retriever_samples/kendra_retriever_open_ai.py
+++ b/kendra_retriever_samples/kendra_retriever_open_ai.py
@@ -11,7 +11,7 @@ def build_chain():
 
   llm = OpenAI(batch_size=5, temperature=0, max_tokens=300)
 
-  retriever = AmazonKendraRetriever(index_id=kendra_index_id)
+  retriever = AmazonKendraRetriever(index_id=kendra_index_id,region_name=region)
 
   prompt_template = """
   The following is a friendly conversation between a human and an AI. 
diff --git a/kendra_retriever_samples/requirements.txt b/kendra_retriever_samples/requirements.txt
index cd34738..8c93dfc 100644
--- a/kendra_retriever_samples/requirements.txt
+++ b/kendra_retriever_samples/requirements.txt
@@ -1,5 +1,5 @@
 langchain==0.0.263
-boto3>=1.26.159
+boto3>=1.28.27
 openai
 anthropic
 streamlit