diff --git a/notebooks/254-llm-chatbot/254-llm-chatbot.ipynb b/notebooks/254-llm-chatbot/254-llm-chatbot.ipynb
index 1d6eab48aea..91a3aec6099 100644
--- a/notebooks/254-llm-chatbot/254-llm-chatbot.ipynb
+++ b/notebooks/254-llm-chatbot/254-llm-chatbot.ipynb
@@ -129,7 +129,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
- "model_id": "169978da6d5942bd8146676f3bf5db8b",
+ "model_id": "1e8ca46ac6734f8c816a14cbe46964ce",
"version_major": 2,
"version_minor": 0
},
@@ -223,15 +223,15 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "2023-12-12 21:55:41.474562: I tensorflow/core/util/port.cc:111] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n",
- "2023-12-12 21:55:41.476575: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.\n",
- "2023-12-12 21:55:41.501573: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n",
- "2023-12-12 21:55:41.501593: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n",
- "2023-12-12 21:55:41.501613: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
- "2023-12-12 21:55:41.506678: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.\n",
- "2023-12-12 21:55:41.507421: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
+ "2023-12-21 21:33:05.855788: I tensorflow/core/util/port.cc:111] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n",
+ "2023-12-21 21:33:05.857870: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.\n",
+ "2023-12-21 21:33:05.883126: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n",
+ "2023-12-21 21:33:05.883147: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n",
+ "2023-12-21 21:33:05.883167: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
+ "2023-12-21 21:33:05.888388: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.\n",
+ "2023-12-21 21:33:05.889023: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
"To enable the following instructions: AVX2 AVX512F AVX512_VNNI AVX512_BF16 AVX_VNNI AMX_TILE AMX_INT8 AMX_BF16 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
- "2023-12-12 21:55:42.037050: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n"
+ "2023-12-21 21:33:06.449452: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n"
]
}
],
@@ -292,7 +292,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
- "model_id": "671a0e89103d496eb4cdce5f6c70904a",
+ "model_id": "c802a1fb556c4abdb38b967c02ef3ef6",
"version_major": 2,
"version_minor": 0
},
@@ -306,7 +306,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
- "model_id": "f6376de7c87145f2a00de1c3e6edf4c6",
+ "model_id": "43b1bd84b5ef4fb0b015411fa3edc862",
"version_major": 2,
"version_minor": 0
},
@@ -320,7 +320,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
- "model_id": "1126a5495fb5426588da00e518586bde",
+ "model_id": "ec15e0c8aaa54fc080d9d8d8938c233a",
"version_major": 2,
"version_minor": 0
},
@@ -593,7 +593,7 @@
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": 9,
"id": "837b4a3b-ccc3-4004-9577-2b2c7b802dea",
"metadata": {
"tags": []
@@ -602,15 +602,15 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
- "model_id": "4af32a190c7a4896a06743fe05c7b56b",
+ "model_id": "54ae70217dbd4299974e24aae599957e",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
- "Dropdown(description='Device:', options=('CPU', 'GPU.0', 'GPU.1', 'AUTO'), value='CPU')"
+ "Dropdown(description='Device:', options=('CPU', 'GPU', 'AUTO'), value='CPU')"
]
},
- "execution_count": 10,
+ "execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
@@ -637,7 +637,7 @@
},
{
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": 10,
"id": "5333ab9b-ff5d-4a7f-bcdc-9cca5d56dc0a",
"metadata": {
"tags": []
@@ -659,7 +659,7 @@
},
{
"cell_type": "code",
- "execution_count": 12,
+ "execution_count": 11,
"id": "3536a1a7",
"metadata": {
"collapsed": false,
@@ -671,15 +671,15 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
- "model_id": "0f954bac863d48f5ab0b9eb779f0a82d",
+ "model_id": "03dc64b5e12e4fb79fb36a63ffef2ef2",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
- "Dropdown(description='Model to run:', options=('INT4', 'FP16'), value='INT4')"
+ "Dropdown(description='Model to run:', options=('FP16',), value='FP16')"
]
},
- "execution_count": 12,
+ "execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
@@ -705,7 +705,7 @@
},
{
"cell_type": "code",
- "execution_count": 13,
+ "execution_count": 12,
"id": "7a041101-7336-40fd-96c9-cd298015a0f3",
"metadata": {
"tags": []
@@ -715,7 +715,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "Loading model from chatglm2-6b/INT4_compressed_weights\n"
+ "Loading model from chatglm3-6b/FP16\n"
]
},
{
@@ -760,7 +760,7 @@
},
{
"cell_type": "code",
- "execution_count": 14,
+ "execution_count": 13,
"id": "8f6f7596-5677-4931-875b-aaabfa23cabc",
"metadata": {},
"outputs": [
@@ -768,7 +768,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "/home/ea/work/openvino_notebooks/notebooks/254-llm-chatbot/ov_llm_model.py:400: FutureWarning: `shared_memory` is deprecated and will be removed in 2024.0. Value of `shared_memory` is going to override `share_inputs` value. Please use only `share_inputs` explicitly.\n",
+ "/home/ethan/intel/openvino_notebooks/notebooks/254-llm-chatbot/ov_llm_model.py:400: FutureWarning: `shared_memory` is deprecated and will be removed in 2024.0. Value of `shared_memory` is going to override `share_inputs` value. Please use only `share_inputs` explicitly.\n",
" self.request.start_async(inputs, shared_memory=True)\n"
]
},
@@ -833,12 +833,50 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 14,
"id": "01f8f7f8-072e-45dc-b7c9-18d8c3c47754",
"metadata": {
"tags": []
},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Running on local URL: http://10.3.233.70:4768\n",
+ "\n",
+ "To create a public link, set `share=True` in `launch()`.\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/plain": []
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/home/ethan/intel/openvino_notebooks/notebooks/254-llm-chatbot/ov_llm_model.py:400: FutureWarning: `shared_memory` is deprecated and will be removed in 2024.0. Value of `shared_memory` is going to override `share_inputs` value. Please use only `share_inputs` explicitly.\n",
+ " self.request.start_async(inputs, shared_memory=True)\n"
+ ]
+ }
+ ],
"source": [
"from threading import Event, Thread\n",
"from uuid import uuid4\n",
@@ -922,7 +960,7 @@
"\n",
"def default_partial_text_processor(partial_text: str, new_text: str):\n",
" \"\"\"\n",
- " helper for updating partially generated answer, used by de\n",
+ " helper for updating partially generated answer, used by default\n",
"\n",
" Params:\n",
" partial_text: text buffer for storing previosly generated text\n",
@@ -972,7 +1010,7 @@
" return text\n",
"\n",
"\n",
- "def user(text, history):\n",
+ "def user(message, history):\n",
" \"\"\"\n",
" callback function for updating user messages in interface on submit button click\n",
"\n",
@@ -983,8 +1021,7 @@
" None\n",
" \"\"\"\n",
" # Append the user's message to the conversation history\n",
- " history = history + [(text, None)]\n",
- " return \"\", history\n",
+ " return \"\", history + [[message, \"\"]]\n",
"\n",
"\n",
"def bot(history, temperature, top_p, top_k, repetition_penalty, conversation_id):\n",
@@ -1184,13 +1221,21 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 15,
"id": "7b837f9e-4152-4a5c-880a-ed874aa64a74",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Closing server running on port: 4768\n"
+ ]
+ }
+ ],
"source": [
"# please run this cell for stopping gradio interface\n",
- "# demo.close()"
+ "demo.close()"
]
}
],
diff --git a/notebooks/254-llm-chatbot/254-rag-chatbot.ipynb b/notebooks/254-llm-chatbot/254-rag-chatbot.ipynb
index 4628c583daf..f6d1def9712 100644
--- a/notebooks/254-llm-chatbot/254-rag-chatbot.ipynb
+++ b/notebooks/254-llm-chatbot/254-rag-chatbot.ipynb
@@ -51,10 +51,24 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 23,
"id": "1f077b32-5d36-44b0-9041-407e996283a3",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\u001b[33mWARNING: Skipping openvino-dev as it is not installed.\u001b[0m\u001b[33m\n",
+ "\u001b[0m\u001b[33mWARNING: Skipping openvino as it is not installed.\u001b[0m\u001b[33m\n",
+ "\u001b[0mNote: you may need to restart the kernel to use updated packages.\n",
+ "\n",
+ "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.3.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.3.2\u001b[0m\n",
+ "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
+ "Note: you may need to restart the kernel to use updated packages.\n"
+ ]
+ }
+ ],
"source": [
"%pip uninstall -q -y openvino-dev openvino openvino-nightly\n",
"%pip install -q --extra-index-url https://download.pytorch.org/whl/cpu\\\n",
@@ -62,7 +76,7 @@
"\"nncf>=2.7\"\\\n",
"\"openvino-nightly\"\\\n",
"\"gradio\"\\\n",
- "\"onnx\" \"chromadb\" \"sentence_transformers\" \"langchain\" \"langchainhub\" \"transformers>=4.34.0\" \"unstructured\" \"scikit-learn\""
+ "\"onnx\" \"chromadb\" \"sentence_transformers\" \"langchain\" \"langchainhub\" \"transformers>=4.34.0\" \"unstructured\" \"scikit-learn\" \"python-docx\""
]
},
{
@@ -112,7 +126,7 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": 1,
"id": "d3b57cfb-e727-43a5-b2c9-8f1b1ba72061",
"metadata": {},
"outputs": [
@@ -127,15 +141,15 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "2023-12-12 22:01:30.686211: I tensorflow/core/util/port.cc:111] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n",
- "2023-12-12 22:01:30.688149: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.\n",
- "2023-12-12 22:01:30.712502: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n",
- "2023-12-12 22:01:30.712522: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n",
- "2023-12-12 22:01:30.712540: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
- "2023-12-12 22:01:30.717373: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.\n",
- "2023-12-12 22:01:30.718189: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
+ "2023-12-24 07:00:22.754016: I tensorflow/core/util/port.cc:111] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n",
+ "2023-12-24 07:00:22.756105: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.\n",
+ "2023-12-24 07:00:22.781727: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n",
+ "2023-12-24 07:00:22.781748: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n",
+ "2023-12-24 07:00:22.781768: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
+ "2023-12-24 07:00:22.787005: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.\n",
+ "2023-12-24 07:00:22.787908: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
"To enable the following instructions: AVX2 AVX512F AVX512_VNNI AVX512_BF16 AVX_VNNI AMX_TILE AMX_INT8 AMX_BF16 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
- "2023-12-12 22:01:31.258810: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n"
+ "2023-12-24 07:00:23.453118: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n"
]
}
],
@@ -176,14 +190,14 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": 2,
"id": "37bf49d7",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
- "model_id": "4241d5e2520f4647bc6728dd80c90927",
+ "model_id": "84b3d28a335c4479b31f1ea88c95c257",
"version_major": 2,
"version_minor": 0
},
@@ -191,7 +205,7 @@
"Dropdown(description='LLM Model:', options=('tiny-llama-1b-chat', 'red-pajama-3b-chat', 'llama-2-chat-7b', 'mp…"
]
},
- "execution_count": 4,
+ "execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
@@ -213,7 +227,7 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": 3,
"id": "49ea95f8",
"metadata": {},
"outputs": [
@@ -221,7 +235,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "Selected LLM model chatglm3-6b\n"
+ "Selected LLM model zephyr-7b-beta\n"
]
}
],
@@ -288,14 +302,14 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": 4,
"id": "c6a38153",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
- "model_id": "329f80b26f2c496fbdc5b6438a6d405a",
+ "model_id": "4d30072191ce48dab051838b66af7eb1",
"version_major": 2,
"version_minor": 0
},
@@ -309,7 +323,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
- "model_id": "b7c42390041c4cebb40808aed3df1de3",
+ "model_id": "aa20a85b31ba4c03a09eedaa7bcb9917",
"version_major": 2,
"version_minor": 0
},
@@ -323,7 +337,7 @@
{
"data": {
"application/vnd.jupyter.widget-view+json": {
- "model_id": "6ae4adffa07d4888a88eeb686b0a1229",
+ "model_id": "354c2cf647db495fb9034b3ab1c756da",
"version_major": 2,
"version_minor": 0
},
@@ -361,7 +375,7 @@
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": 5,
"id": "2020d522",
"metadata": {},
"outputs": [],
@@ -537,7 +551,7 @@
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": 6,
"id": "8e127215",
"metadata": {},
"outputs": [
@@ -545,7 +559,9 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "Size of FP16 model is 11909.69 MB\n"
+ "Size of FP16 model is 27657.02 MB\n",
+ "Size of model with INT4 compressed weights is 5053.39 MB\n",
+ "Compression rate for INT4 model: 5.473\n"
]
}
],
@@ -580,22 +596,22 @@
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": 7,
"id": "ff80e6eb-7923-40ef-93d8-5e6c56e50667",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
- "model_id": "f2a58d5b28be4b5284745d65ff60540f",
+ "model_id": "27d89272c4f84c9fb06ec783816e34f5",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
- "Dropdown(description='Embedding Model:', options=('all-mpnet-base-v2', 'text2vec-large-chinese'), value='all-m…"
+ "Dropdown(description='Embedding Model:', options=('all-mpnet-base-v2',), value='all-mpnet-base-v2')"
]
},
- "execution_count": 10,
+ "execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
@@ -618,7 +634,7 @@
},
{
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": 8,
"id": "790afcf8",
"metadata": {},
"outputs": [
@@ -626,7 +642,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "Selected text2vec-large-chinese model\n"
+ "Selected all-mpnet-base-v2 model\n"
]
}
],
@@ -637,7 +653,7 @@
},
{
"cell_type": "code",
- "execution_count": 12,
+ "execution_count": 9,
"id": "58d75dad-2eeb-4edd-8d12-d77a365f8eda",
"metadata": {
"scrolled": true
@@ -670,14 +686,14 @@
},
{
"cell_type": "code",
- "execution_count": 13,
+ "execution_count": 10,
"id": "e11e73cf",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
- "model_id": "fca81d3e662d4bb7ad99929de50814bd",
+ "model_id": "27966ce0a9304402975dbe2c86f76e87",
"version_major": 2,
"version_minor": 0
},
@@ -685,7 +701,7 @@
"Dropdown(description='Device:', options=('CPU', 'GPU', 'AUTO'), value='CPU')"
]
},
- "execution_count": 13,
+ "execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
@@ -704,10 +720,18 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 11,
"id": "9ab29b85",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Embedding model will be loaded to CPU device for response generation\n"
+ ]
+ }
+ ],
"source": [
"print(f\"Embedding model will be loaded to {embedding_device.value} device for response generation\")"
]
@@ -723,10 +747,26 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 12,
"id": "6d044d01",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "12be7dcd8325436f9467b0474d9d38ac",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "Dropdown(description='Device:', options=('CPU', 'GPU', 'AUTO'), value='CPU')"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"llm_device = widgets.Dropdown(\n",
" options=core.available_devices + [\"AUTO\"],\n",
@@ -740,10 +780,18 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 13,
"id": "348b90fe",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "LLM model will be loaded to CPU device for response generation\n"
+ ]
+ }
+ ],
"source": [
"print(f\"LLM model will be loaded to {llm_device.value} device for response generation\")"
]
@@ -770,7 +818,7 @@
},
{
"cell_type": "code",
- "execution_count": 15,
+ "execution_count": 14,
"id": "df3e8fd1-d4c1-4e33-b46e-7840e392f8ee",
"metadata": {},
"outputs": [
@@ -817,7 +865,7 @@
},
{
"cell_type": "code",
- "execution_count": 16,
+ "execution_count": 15,
"id": "efe29701",
"metadata": {},
"outputs": [],
@@ -827,22 +875,22 @@
},
{
"cell_type": "code",
- "execution_count": 17,
+ "execution_count": 16,
"id": "8b014f24-aa5b-4d40-924d-d579ad7fcec6",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
- "model_id": "30e4be3151294400b6595e83f69a04ec",
+ "model_id": "7b3c2991f3284deea331b5806a336d77",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
- "Dropdown(description='Model to run:', options=('FP16',), value='FP16')"
+ "Dropdown(description='Model to run:', options=('INT4', 'FP16'), value='INT4')"
]
},
- "execution_count": 17,
+ "execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
@@ -868,24 +916,30 @@
},
{
"cell_type": "code",
- "execution_count": 18,
+ "execution_count": 17,
"id": "f7f708db-8de1-4efd-94b2-fcabc48d52f4",
"metadata": {},
"outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.\n",
+ "The argument `trust_remote_code` is to be used along with export=True. It will be ignored.\n"
+ ]
+ },
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Loading model from chatglm3-6b/FP16\n"
+ "Loading model from zephyr-7b-beta/INT4_compressed_weights\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
- "The argument `trust_remote_code` is to be used along with export=True. It will be ignored.\n",
- "Compiling the model to CPU ...\n",
- "Setting OpenVINO CACHE_DIR to chatglm3-6b/FP16/model_cache\n"
+ "Compiling the model to CPU ...\n"
]
}
],
@@ -949,7 +1003,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 18,
"id": "6040e0d0",
"metadata": {},
"outputs": [],
@@ -962,6 +1016,11 @@
" tokenizer=tok,\n",
" max_new_tokens=256,\n",
" streamer=streamer,\n",
+ " # temperature=1,\n",
+ " # do_sample=True,\n",
+ " # top_p=0.8,\n",
+ " # top_k=20,\n",
+ " # repetition_penalty=1.1,\n",
")\n",
"if stop_tokens is not None:\n",
" generate_kwargs[\"stopping_criteria\"] = StoppingCriteriaList(stop_tokens)\n",
@@ -1005,6 +1064,72 @@
{
"cell_type": "code",
"execution_count": 19,
+ "id": "5b97eeeb",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from typing import List\n",
+ "from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter, MarkdownTextSplitter\n",
+ "from langchain.document_loaders import (\n",
+ " CSVLoader,\n",
+ " EverNoteLoader,\n",
+ " PDFMinerLoader,\n",
+ " TextLoader,\n",
+ " UnstructuredEPubLoader,\n",
+ " UnstructuredHTMLLoader,\n",
+ " UnstructuredMarkdownLoader,\n",
+ " UnstructuredODTLoader,\n",
+ " UnstructuredPowerPointLoader,\n",
+ " UnstructuredWordDocumentLoader, )\n",
+ "\n",
+ "\n",
+ "class ChineseTextSplitter(CharacterTextSplitter):\n",
+ " def __init__(self, pdf: bool = False, **kwargs):\n",
+ " super().__init__(**kwargs)\n",
+ " self.pdf = pdf\n",
+ "\n",
+ " def split_text(self, text: str) -> List[str]:\n",
+ " if self.pdf:\n",
+ " text = re.sub(r\"\\n{3,}\", \"\\n\", text)\n",
+ " text = text.replace(\"\\n\\n\", \"\")\n",
+ " sent_sep_pattern = re.compile(\n",
+ " '([﹒﹔﹖﹗.。!?][\"’”」』]{0,2}|(?=[\"‘“「『]{1,2}|$))')\n",
+ " sent_list = []\n",
+ " for ele in sent_sep_pattern.split(text):\n",
+ " if sent_sep_pattern.match(ele) and sent_list:\n",
+ " sent_list[-1] += ele\n",
+ " elif ele:\n",
+ " sent_list.append(ele)\n",
+ " return sent_list\n",
+ "\n",
+ "\n",
+ "TEXT_SPLITERS = {\n",
+ " \"Character\": CharacterTextSplitter,\n",
+ " \"RecursiveCharacter\": RecursiveCharacterTextSplitter,\n",
+ " \"Markdown\": MarkdownTextSplitter,\n",
+ " \"Chinese\": ChineseTextSplitter,\n",
+ "}\n",
+ "\n",
+ "\n",
+ "LOADERS = {\n",
+ " \".csv\": (CSVLoader, {}),\n",
+ " \".doc\": (UnstructuredWordDocumentLoader, {}),\n",
+ " \".docx\": (UnstructuredWordDocumentLoader, {}),\n",
+ " \".enex\": (EverNoteLoader, {}),\n",
+ " \".epub\": (UnstructuredEPubLoader, {}),\n",
+ " \".html\": (UnstructuredHTMLLoader, {}),\n",
+ " \".md\": (UnstructuredMarkdownLoader, {}),\n",
+ " \".odt\": (UnstructuredODTLoader, {}),\n",
+ " \".pdf\": (PDFMinerLoader, {}),\n",
+ " \".ppt\": (UnstructuredPowerPointLoader, {}),\n",
+ " \".pptx\": (UnstructuredPowerPointLoader, {}),\n",
+ " \".txt\": (TextLoader, {\"encoding\": \"utf8\"}),\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 27,
"id": "0908e5e9-4dcb-4fc8-8480-3cf70fd5e934",
"metadata": {},
"outputs": [
@@ -1012,7 +1137,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "Running on local URL: http://10.3.233.70:7868\n",
+ "Running on local URL: http://10.3.233.70:4888\n",
"\n",
"To create a public link, set `share=True` in `launch()`.\n"
]
@@ -1020,7 +1145,7 @@
{
"data": {
"text/html": [
- ""
+ ""
],
"text/plain": [
""
@@ -1033,7 +1158,7 @@
"data": {
"text/plain": []
},
- "execution_count": 19,
+ "execution_count": 27,
"metadata": {},
"output_type": "execute_result"
},
@@ -1041,9 +1166,11 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "/home/ethan/intel/openvino_notebooks/notebooks/254-llm-chatbot/ov_llm_model.py:400: FutureWarning: `shared_memory` is deprecated and will be removed in 2024.0. Value of `shared_memory` is going to override `share_inputs` value. Please use only `share_inputs` explicitly.\n",
+ "/home/ethan/intel/openvino_notebooks/openvino_env/lib/python3.10/site-packages/optimum/intel/openvino/modeling_decoder.py:388: FutureWarning: `shared_memory` is deprecated and will be removed in 2024.0. Value of `shared_memory` is going to override `share_inputs` value. Please use only `share_inputs` explicitly.\n",
" self.request.start_async(inputs, shared_memory=True)\n",
- "/home/ethan/intel/openvino_notebooks/notebooks/254-llm-chatbot/ov_llm_model.py:400: FutureWarning: `shared_memory` is deprecated and will be removed in 2024.0. Value of `shared_memory` is going to override `share_inputs` value. Please use only `share_inputs` explicitly.\n",
+ "/home/ethan/intel/openvino_notebooks/openvino_env/lib/python3.10/site-packages/optimum/intel/openvino/modeling_decoder.py:388: FutureWarning: `shared_memory` is deprecated and will be removed in 2024.0. Value of `shared_memory` is going to override `share_inputs` value. Please use only `share_inputs` explicitly.\n",
+ " self.request.start_async(inputs, shared_memory=True)\n",
+ "/home/ethan/intel/openvino_notebooks/openvino_env/lib/python3.10/site-packages/optimum/intel/openvino/modeling_decoder.py:388: FutureWarning: `shared_memory` is deprecated and will be removed in 2024.0. Value of `shared_memory` is going to override `share_inputs` value. Please use only `share_inputs` explicitly.\n",
" self.request.start_async(inputs, shared_memory=True)\n"
]
}
@@ -1051,19 +1178,36 @@
"source": [
"from langchain.prompts import PromptTemplate\n",
"from langchain.vectorstores import Chroma\n",
- "from langchain.text_splitter import CharacterTextSplitter\n",
"from langchain.chains import RetrievalQA\n",
- "from langchain.document_loaders import UnstructuredMarkdownLoader, CSVLoader, TextLoader\n",
- "from typing import List\n",
+ "from langchain.docstore.document import Document\n",
"from threading import Event, Thread\n",
"import gradio as gr\n",
"import re\n",
"from uuid import uuid4\n",
"\n",
"\n",
+ "def load_single_document(file_path: str) -> List[Document]:\n",
+ " \"\"\"\n",
+ " helper for loading a single document\n",
+ "\n",
+ " Params:\n",
+ " file_path: document path\n",
+ " Returns:\n",
+ " documents loaded\n",
+ "\n",
+ " \"\"\"\n",
+ " ext = \".\" + file_path.rsplit(\".\", 1)[-1]\n",
+ " if ext in LOADERS:\n",
+ " loader_class, loader_args = LOADERS[ext]\n",
+ " loader = loader_class(file_path, **loader_args)\n",
+ " return loader.load()\n",
+ "\n",
+ " raise ValueError(f\"File does not exist '{ext}'\")\n",
+ "\n",
+ "\n",
"def default_partial_text_processor(partial_text: str, new_text: str):\n",
" \"\"\"\n",
- " helper for updating partially generated answer, used by de\n",
+ " helper for updating partially generated answer, used by default\n",
"\n",
" Params:\n",
" partial_text: text buffer for storing previosly generated text\n",
@@ -1081,22 +1225,7 @@
")\n",
"\n",
"\n",
- "class ChineseTextSplitter(CharacterTextSplitter):\n",
- " def __init__(self, pdf: bool = False, **kwargs):\n",
- " super().__init__(**kwargs)\n",
- "\n",
- " def split_text(self, text: str) -> List[str]:\n",
- " sent_sep_pattern = re.compile('([﹒﹔﹖﹗.。!?][\"’”」』]{0,2}|(?=[\"‘“「『]{1,2}|$))')\n",
- " sent_list = []\n",
- " for ele in sent_sep_pattern.split(text):\n",
- " if sent_sep_pattern.match(ele) and sent_list:\n",
- " sent_list[-1] += ele\n",
- " elif ele:\n",
- " sent_list.append(ele)\n",
- " return sent_list\n",
- "\n",
- "\n",
- "def build_chain(doc, chunk_size, chunk_overlap, vector_search_top_k):\n",
+ "def build_chain(docs, spliter_name, chunk_size, chunk_overlap, vector_search_top_k):\n",
" \"\"\"\n",
" Initialize a QA chain\n",
"\n",
@@ -1107,24 +1236,13 @@
" vector_search_top_k: Vector search top k\n",
"\n",
" \"\"\"\n",
- " \n",
- " if doc.name.lower().endswith(\".md\"):\n",
- " loader = UnstructuredMarkdownLoader(doc.name)\n",
- " elif doc.name.lower().endswith(\".csv\"):\n",
- " loader = CSVLoader(doc.name)\n",
- " else:\n",
- " loader = TextLoader(doc.name)\n",
- " \n",
- " documents = loader.load()\n",
+ " documents = []\n",
+ " for doc in docs:\n",
+ " documents.extend(load_single_document(doc.name))\n",
"\n",
- " if \"qwen\" in llm_model_id.value or \"chatglm\" in llm_model_id.value:\n",
- " text_splitter = ChineseTextSplitter(\n",
- " chunk_size=chunk_size, chunk_overlap=chunk_overlap\n",
- " )\n",
- " else:\n",
- " text_splitter = CharacterTextSplitter(\n",
- " chunk_size=chunk_size, chunk_overlap=chunk_overlap\n",
- " )\n",
+ " text_splitter = TEXT_SPLITERS[spliter_name](\n",
+ " chunk_size=chunk_size, chunk_overlap=chunk_overlap\n",
+ " )\n",
"\n",
" texts = text_splitter.split_documents(documents)\n",
"\n",
@@ -1155,7 +1273,7 @@
" None\n",
" \"\"\"\n",
" # Append the user's message to the conversation history\n",
- " return \"\", history + [(message, None)]\n",
+ " return \"\", history + [[message, \"\"]]\n",
"\n",
"\n",
"def bot(history, conversation_id):\n",
@@ -1163,12 +1281,7 @@
" callback function for running chatbot on submit button click\n",
"\n",
" Params:\n",
- " history: conversation history\n",
- " temperature: parameter for control the level of creativity in AI-generated text.\n",
- " By adjusting the `temperature`, you can influence the AI model's probability distribution, making the text more focused or diverse.\n",
- " top_p: parameter for control the range of tokens considered by the AI model based on their cumulative probability.\n",
- " top_k: parameter for control the range of tokens considered by the AI model based on their cumulative probability, selecting number of tokens with highest probability.\n",
- " repetition_penalty: parameter for penalizing tokens based on how frequently they occur in the text.\n",
+ " history: conversation history.\n",
" conversation_id: unique conversation identifier.\n",
"\n",
" \"\"\"\n",
@@ -1180,6 +1293,8 @@
"\n",
" t1 = Thread(target=infer, args=(history[-1][0],))\n",
" t1.start()\n",
+ "\n",
+ " # Initialize an empty string to store the generated text\n",
" partial_text = \"\"\n",
" for new_text in streamer:\n",
" partial_text = text_processor(partial_text, new_text)\n",
@@ -1203,10 +1318,35 @@
" gr.Markdown(f\"\"\"Powered by OpenVINO and {llm_model_id.value} \"\"\")\n",
" with gr.Row():\n",
" with gr.Column(scale=1):\n",
- " docs = gr.File(label=\"Load a Markdown/CSV file\", file_types=[\".md\", \".csv\"])\n",
+ " docs = gr.File(\n",
+ " label=\"Load text files\",\n",
+ " file_count=\"multiple\",\n",
+ " file_types=[\n",
+ " \".csv\",\n",
+ " \".doc\",\n",
+ " \".docx\",\n",
+ " \".enex\",\n",
+ " \".epub\",\n",
+ " \".html\",\n",
+ " \".md\",\n",
+ " \".odt\",\n",
+ " \".pdf\",\n",
+ " \".ppt\",\n",
+ " \".pptx\",\n",
+ " \".txt\",\n",
+ " ],\n",
+ " )\n",
" load_docs = gr.Button(\"Build Retriever\")\n",
- " retriever_argument = gr.Accordion(\"Retriever Configuration\")\n",
+ " retriever_argument = gr.Accordion(\"Retriever Configuration\", open=False)\n",
" with retriever_argument:\n",
+ " spliter = gr.Dropdown(\n",
+ " [\"Character\", \"RecursiveCharacter\", \"Markdown\", \"Chinese\"],\n",
+ " value=\"RecursiveCharacter\",\n",
+ " label=\"Text Spliter\",\n",
+ " info=\"Method used to splite the documents\",\n",
+ " multiselect=False,\n",
+ " )\n",
+ "\n",
" chunk_size = gr.Slider(\n",
" label=\"Chunk size\",\n",
" value=1000,\n",
@@ -1251,27 +1391,19 @@
" with gr.Column():\n",
" with gr.Row():\n",
" submit = gr.Button(\"Submit\")\n",
- " stop = gr.Button(\"Stop\")\n",
" clear = gr.Button(\"Clear\")\n",
" load_docs.click(\n",
" build_chain,\n",
- " inputs=[docs, chunk_size, chunk_overlap, vector_search_top_k],\n",
+ " inputs=[docs, spliter, chunk_size, chunk_overlap, vector_search_top_k],\n",
" outputs=[langchain_status],\n",
" queue=False,\n",
" )\n",
- " submit_event = msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(\n",
- " bot, [chatbot, conversation_id], chatbot, queue=True\n",
- " )\n",
+ " submit_event = msg.submit(\n",
+ " user, [msg, chatbot], [msg, chatbot], queue=False, trigger_mode=\"once\"\n",
+ " ).then(bot, [chatbot, conversation_id], chatbot, queue=True)\n",
" submit_click_event = submit.click(\n",
- " user, [msg, chatbot], [msg, chatbot], queue=False\n",
+ " user, [msg, chatbot], [msg, chatbot], queue=False, trigger_mode=\"once\"\n",
" ).then(bot, [chatbot, conversation_id], chatbot, queue=True)\n",
- " stop.click(\n",
- " fn=None,\n",
- " inputs=None,\n",
- " outputs=None,\n",
- " cancels=[submit_event, submit_click_event],\n",
- " queue=False,\n",
- " )\n",
" clear.click(lambda: None, None, chatbot, queue=False)\n",
"\n",
"demo.queue(max_size=2)\n",
@@ -1285,7 +1417,7 @@
},
{
"cell_type": "code",
- "execution_count": 43,
+ "execution_count": 28,
"id": "6f4b5a84-bebf-49b9-b2fa-5e788ed2cbac",
"metadata": {},
"outputs": [
@@ -1293,7 +1425,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "Closing server running on port: 7868\n"
+ "Closing server running on port: 4888\n"
]
}
],
diff --git a/notebooks/254-llm-chatbot/config.py b/notebooks/254-llm-chatbot/config.py
index 5543bfbc9a9..cb0ecb71a83 100644
--- a/notebooks/254-llm-chatbot/config.py
+++ b/notebooks/254-llm-chatbot/config.py
@@ -127,19 +127,18 @@ def youri_partial_text_processor(partial_text, new_text):
"chatglm3-6b": {
"model_id": "THUDM/chatglm3-6b",
"remote": True,
- "start_message": f"<|system|>\n{DEFAULT_SYSTEM_PROMPT }\n",
- "history_template": "<|user|>\n{user}\n<|assistant|>\n{assistant}\n",
+ "start_message": f"<|system|>\n{DEFAULT_SYSTEM_PROMPT}\n",
+ "history_template": "<|user|>\n{user} \n<|assistant|>\n{assistant} \n",
"partial_text_processor": chatglm_partial_text_processor,
- "current_message_template": "<|user|>\n{user}\n<|assistant|>\n",
+ "current_message_template": "<|user|>\n{user} \n<|assistant|>\n{assistant}",
"tokenizer_kwargs": {"add_special_tokens": False},
- "stop_tokens": ["", "[MASK]", "[gMASK]", "[sMASK]", "sop", "eop"],
- "prompt_template": f"""<|system|>
- {DEFAULT_RAG_PROMPT_CHINESE }"""
+ "stop_tokens": [""],
+ "prompt_template": f"""<|system|> {DEFAULT_RAG_PROMPT_CHINESE }"""
+ """
<|user|>
问题: {question}
已知内容: {context}
- 回答:
+ 回答:
<|assistant|>""",
},
"mistral-7b": {