deepsense-ai · eryk-dsai · Aug 31, 2023 · Aug 31, 2023 · Sep 1, 2023 · Sep 1, 2023
diff --git a/docs/extras/integrations/chat/huggingface_llama2.ipynb b/docs/extras/integrations/chat/huggingface_llama2.ipynb
@@ -0,0 +1,296 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Llama-2-Chat Model from Hugging Face"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%load_ext autoreload\n",
+    "%autoreload 2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Hugging Face imports:\n",
+    "import torch\n",
+    "from transformers import (\n",
+    "    AutoModelForCausalLM,\n",
+    "    AutoTokenizer,\n",
+    "    BitsAndBytesConfig,\n",
+    "    pipeline,\n",
+    ")\n",
+    "\n",
+    "# LangChain imports:\n",
+    "from langchain.chat_models import ChatLlama2Hf\n",
+    "from langchain.schema import AIMessage, HumanMessage, SystemMessage"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This notebook assumes that you were granted with access to the Llama 2 models in the Hugging Face models hub. To use the model locally, you need to be [logged in](https://huggingface.co/docs/huggingface_hub/quick-start#login) with a Hugging Face account."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "fdfe7c3faf0c40d0bac0fd22fd9ebd38",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "VBox(children=(HTML(value='<center> <img\\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "from huggingface_hub import login\n",
+    "login()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model_name = \"meta-llama/Llama-2-7b-chat-hf\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "bnb_config = BitsAndBytesConfig(\n",
+    "    load_in_4bit=True,\n",
+    "    bnb_4bit_quant_type=\"nf4\",\n",
+    "    bnb_4bit_use_double_quant=True,\n",
+    "    bnb_4bit_compute_dtype=torch.bfloat16,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "4c3b0838de6140019682c5e5d17f1f37",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "tokenizer = AutoTokenizer.from_pretrained(model_name)\n",
+    "model_4bit = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=bnb_config, device_map=\"auto\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pipe = pipeline(\n",
+    "    \"text-generation\",\n",
+    "    model=model_4bit,\n",
+    "    tokenizer=tokenizer,\n",
+    "    torch_dtype=torch.float16,\n",
+    "    device_map=\"auto\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "chat = ChatLlama2Hf(pipeline=pipe)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Generation kwargs:\n",
+    "pipeline_kwargs = {\n",
+    "    \"do_sample\": True,\n",
+    "    \"top_p\": 0.95,\n",
+    "    \"temperature\": 0.7,\n",
+    "    \"eos_token_id\": tokenizer.eos_token_id,\n",
+    "    \"max_length\": 256,    \n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Single calls:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "AIMessage(content=' Sure! Here\\'s the translation of \"I love programming\" from English to French:\\nJe adore le programming.\\n\\nI hope that helps! Let me know if you have any other sentences you\\'d like me to translate.', additional_kwargs={}, example=False)"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "messages = [\n",
+    "    SystemMessage(\n",
+    "        content=\"You are a helpful assistant that translates English to French.\"\n",
+    "    ),\n",
+    "    HumanMessage(\n",
+    "        content=\"Translate this sentence from English to French. I love programming.\"\n",
+    "    ),\n",
+    "]\n",
+    "chat(messages, **pipeline_kwargs)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Single calls with stop words"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "AIMessage(content=\" Of course, I'd be happy to help! Artificial\", additional_kwargs={}, example=False)"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "messages = [\n",
+    "    SystemMessage(\n",
+    "        content=\"You are a helpful assistant.\"\n",
+    "    ),\n",
+    "    HumanMessage(\n",
+    "        content=\"Tell me the history of AI.\"\n",
+    "    ),\n",
+    "]\n",
+    "chat(messages, stop=[\"Artificial\"], **pipeline_kwargs)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Batch calls:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "LLMResult(generations=[[ChatGeneration(text=' Great! \"Programming\" in English can be translated to \"le programming\" in French.\\n\\nSo, you love programming? \"Aimez-vous le programming\" in French.', generation_info=None, message=AIMessage(content=' Great! \"Programming\" in English can be translated to \"le programming\" in French.\\n\\nSo, you love programming? \"Aimez-vous le programming\" in French.', additional_kwargs={}, example=False))], [ChatGeneration(text=' Bonjour! Je suis heureux de vous aider avec la translation de votre phrase en français.\\n\\nVous aimez l\\'intelligence artificielle.\\n\\n(Note: I used the phrase \"Bonjour!\" to greet you in French, as it is a common way to start a conversation in France. \"Je suis heureux de vous aider\" means \"I am happy to help you\" in French.)', generation_info=None, message=AIMessage(content=' Bonjour! Je suis heureux de vous aider avec la translation de votre phrase en français.\\n\\nVous aimez l\\'intelligence artificielle.\\n\\n(Note: I used the phrase \"Bonjour!\" to greet you in French, as it is a common way to start a conversation in France. \"Je suis heureux de vous aider\" means \"I am happy to help you\" in French.)', additional_kwargs={}, example=False))]], llm_output={}, run=[RunInfo(run_id=UUID('844a9416-e941-478b-ac61-82b5c7c15fb7')), RunInfo(run_id=UUID('57636053-2e2c-41a4-89ad-7ef70d38fe12'))])"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "batch_messages = [\n",
+    "    [\n",
+    "        SystemMessage(content=\"You are a helpful assistant that translates English to French.\"),\n",
+    "        HumanMessage(content=\"I love programming.\")\n",
+    "    ],\n",
+    "    [\n",
+    "        SystemMessage(content=\"You are a helpful assistant that translates English to French.\"),\n",
+    "        HumanMessage(content=\"I love artificial intelligence.\")\n",
+    "    ],\n",
+    "]\n",
+    "result = chat.generate(batch_messages)\n",
+    "result"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3.10.12 ('langchain_venv': venv)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  },
+  "orig_nbformat": 4,
+  "vscode": {
+   "interpreter": {
+    "hash": "3372ef96e068313d34c91eab0f20d815c93d37110de821968e5d598f73bfb74c"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/libs/langchain/langchain/chat_models/__init__.py b/libs/langchain/langchain/chat_models/__init__.py
@@ -23,6 +23,7 @@
 from langchain.chat_models.ernie import ErnieBotChat
 from langchain.chat_models.fake import FakeListChatModel
 from langchain.chat_models.google_palm import ChatGooglePalm
+from langchain.chat_models.huggingface_llama2 import ChatLlama2Hf
 from langchain.chat_models.human import HumanInputChatModel
 from langchain.chat_models.jinachat import JinaChat
 from langchain.chat_models.litellm import ChatLiteLLM
@@ -41,6 +42,7 @@
     "ChatGooglePalm",
     "ChatMLflowAIGateway",
     "ChatOllama",
+    "ChatLlama2Hf",
     "ChatVertexAI",
     "JinaChat",
     "HumanInputChatModel",