deepsense-ai · eryk-dsai · Aug 31, 2023 · Aug 31, 2023 · Sep 1, 2023 · Sep 1, 2023
diff --git a/docs/extras/integrations/chat/huggingface_llama2.ipynb b/docs/extras/integrations/chat/huggingface_llama2.ipynb
@@ -0,0 +1,298 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Llama-2-Chat Model from Hugging Face"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Hugging Face imports:\n",
+    "import torch\n",
+    "from transformers import (\n",
+    "    AutoModelForCausalLM,\n",
+    "    AutoTokenizer,\n",
+    "    BitsAndBytesConfig,\n",
+    "    pipeline,\n",
+    ")\n",
+    "\n",
+    "# LangChain imports:\n",
+    "from langchain.chat_models import ChatLlama2Hf\n",
+    "from langchain.schema import AIMessage, HumanMessage, SystemMessage"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This notebook assumes that you were granted with access to the Llama 2 models in the Hugging Face models hub. To use the model locally, you need to be [logged in](https://huggingface.co/docs/huggingface_hub/quick-start#login) with a Hugging Face account."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "4b4b6e0544c94c248f85fbc6103efbbb",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "VBox(children=(HTML(value='<center> <img\\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "from huggingface_hub import login\n",
+    "login()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model_name = \"meta-llama/Llama-2-7b-chat-hf\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "bnb_config = BitsAndBytesConfig(\n",
+    "    load_in_4bit=True,\n",
+    "    bnb_4bit_quant_type=\"nf4\",\n",
+    "    bnb_4bit_use_double_quant=True,\n",
+    "    bnb_4bit_compute_dtype=torch.bfloat16,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "5d912b06f1954ab287ea0f73828422a6",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "tokenizer = AutoTokenizer.from_pretrained(model_name)\n",
+    "model_4bit = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=bnb_config, device_map=\"auto\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pipe = pipeline(\n",
+    "    \"text-generation\",\n",
+    "    model=model_4bit,\n",
+    "    tokenizer=tokenizer,\n",
+    "    torch_dtype=torch.float16,\n",
+    "    device_map=\"auto\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "chat = ChatLlama2Hf(pipeline=pipe)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Generation kwargs:\n",
+    "pipeline_kwargs = {\n",
+    "    \"do_sample\": True,\n",
+    "    \"top_p\": 0.95,\n",
+    "    \"temperature\": 0.7,\n",
+    "    \"eos_token_id\": tokenizer.eos_token_id,\n",
+    "    \"max_length\": 256,    \n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Single calls:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      " Sure! Here is the translation of \"I love programming\" from English to French:\n",
+      "\n",
+      "Je suis passionné par le programming.\n",
+      "\n",
+      "I hope this helps! Let me know if you have any other sentences you would like me to translate.\n"
+     ]
+    }
+   ],
+   "source": [
+    "messages = [\n",
+    "    SystemMessage(\n",
+    "        content=\"You are a helpful assistant that translates English to French.\"\n",
+    "    ),\n",
+    "    HumanMessage(\n",
+    "        content=\"Translate this sentence from English to French. I love programming.\"\n",
+    "    ),\n",
+    "]\n",
+    "result = chat(messages, **pipeline_kwargs)\n",
+    "print(result.content)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Single calls with stop words"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      " Of course! Artificial\n"
+     ]
+    }
+   ],
+   "source": [
+    "messages = [\n",
+    "    SystemMessage(\n",
+    "        content=\"You are a helpful assistant.\"\n",
+    "    ),\n",
+    "    HumanMessage(\n",
+    "        content=\"Tell me the history of AI.\"\n",
+    "    ),\n",
+    "]\n",
+    "result = chat(messages, stop=[\"Artificial\", \"Inteligence\"], **pipeline_kwargs)\n",
+    "print(result.content)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Batch calls:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "batch_messages = [\n",
+    "    [\n",
+    "        SystemMessage(content=\"You are a helpful assistant that translates English to French.\"),\n",
+    "        HumanMessage(content=\"I love programming.\")\n",
+    "    ],\n",
+    "    [\n",
+    "        SystemMessage(content=\"You are a helpful assistant that translates English to French.\"),\n",
+    "        HumanMessage(content=\"I love artificial intelligence.\")\n",
+    "    ],\n",
+    "]\n",
+    "result = chat.generate(batch_messages)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Response #0:\n",
+      " Great! \"Programming\" is translated to French as \"programmation\". So, \"I love programmation\" would be the correct way to express your sentiment in French.\n",
+      "\n",
+      "Response #1:\n",
+      " Bonjour! Je suis heureux d'être votre assistant de traduction. Vous aimez l'intelligence artificielle, n'est-ce pas? (You love artificial intelligence, don't you?)\n",
+      "In French, the phrase \"intelligence artificielle\" can be translated to \"artificial intelligence\" in English.\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "for i, generation in enumerate(result.generations):\n",
+    "    print(f\"Response #{i}:\\n{generation[0].text}\", end=\"\\n\\n\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3.10.12 ('langchain_venv': venv)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  },
+  "orig_nbformat": 4,
+  "vscode": {
+   "interpreter": {
+    "hash": "d1d3a3c58a58885896c5459933a599607cdbb9917d7e1ad7516c8786c51f2dd2"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/libs/langchain/langchain/chat_models/__init__.py b/libs/langchain/langchain/chat_models/__init__.py
@@ -23,6 +23,7 @@
 from langchain.chat_models.ernie import ErnieBotChat
 from langchain.chat_models.fake import FakeListChatModel
 from langchain.chat_models.google_palm import ChatGooglePalm
+from langchain.chat_models.huggingface_llama2 import ChatLlama2Hf
 from langchain.chat_models.human import HumanInputChatModel
 from langchain.chat_models.jinachat import JinaChat
 from langchain.chat_models.litellm import ChatLiteLLM
@@ -41,6 +42,7 @@
     "ChatGooglePalm",
     "ChatMLflowAIGateway",
     "ChatOllama",
+    "ChatLlama2Hf",
     "ChatVertexAI",
     "JinaChat",
     "HumanInputChatModel",