[254-rag-chatbot]update the text spliter and document loader (#1562)

* to support more text spliter and date loader to support more text spliter and date loader to support more text spliter and date loader to support more text spliter and date loader to support more text spliter and date loader to support more text spliter and date loader * fix qwen promopt tempalte issue fix qwen promopt tempalte issue fix qwen promopt tempalte issue * support multiple doucuments upload support multiple doucuments upload
openvinotoolkit · Dec 25, 2023 · c6a92b2 · c6a92b2
1 parent c87dbed
commit c6a92b2
Show file tree

Hide file tree

Showing 3 changed files with 335 additions and 159 deletions.
diff --git a/notebooks/254-llm-chatbot/254-llm-chatbot.ipynb b/notebooks/254-llm-chatbot/254-llm-chatbot.ipynb
@@ -129,7 +129,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "169978da6d5942bd8146676f3bf5db8b",
+       "model_id": "1e8ca46ac6734f8c816a14cbe46964ce",
        "version_major": 2,
        "version_minor": 0
       },
@@ -223,15 +223,15 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "2023-12-12 21:55:41.474562: I tensorflow/core/util/port.cc:111] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n",
-      "2023-12-12 21:55:41.476575: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.\n",
-      "2023-12-12 21:55:41.501573: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n",
-      "2023-12-12 21:55:41.501593: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n",
-      "2023-12-12 21:55:41.501613: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
-      "2023-12-12 21:55:41.506678: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.\n",
-      "2023-12-12 21:55:41.507421: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
+      "2023-12-21 21:33:05.855788: I tensorflow/core/util/port.cc:111] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n",
+      "2023-12-21 21:33:05.857870: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.\n",
+      "2023-12-21 21:33:05.883126: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n",
+      "2023-12-21 21:33:05.883147: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n",
+      "2023-12-21 21:33:05.883167: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
+      "2023-12-21 21:33:05.888388: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.\n",
+      "2023-12-21 21:33:05.889023: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
       "To enable the following instructions: AVX2 AVX512F AVX512_VNNI AVX512_BF16 AVX_VNNI AMX_TILE AMX_INT8 AMX_BF16 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
-      "2023-12-12 21:55:42.037050: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n"
+      "2023-12-21 21:33:06.449452: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n"
      ]
     }
    ],
@@ -292,7 +292,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "671a0e89103d496eb4cdce5f6c70904a",
+       "model_id": "c802a1fb556c4abdb38b967c02ef3ef6",
        "version_major": 2,
        "version_minor": 0
       },
@@ -306,7 +306,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "f6376de7c87145f2a00de1c3e6edf4c6",
+       "model_id": "43b1bd84b5ef4fb0b015411fa3edc862",
        "version_major": 2,
        "version_minor": 0
       },
@@ -320,7 +320,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "1126a5495fb5426588da00e518586bde",
+       "model_id": "ec15e0c8aaa54fc080d9d8d8938c233a",
        "version_major": 2,
        "version_minor": 0
       },
@@ -593,7 +593,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 9,
    "id": "837b4a3b-ccc3-4004-9577-2b2c7b802dea",
    "metadata": {
     "tags": []
@@ -602,15 +602,15 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "4af32a190c7a4896a06743fe05c7b56b",
+       "model_id": "54ae70217dbd4299974e24aae599957e",
        "version_major": 2,
        "version_minor": 0
       },
       "text/plain": [
-       "Dropdown(description='Device:', options=('CPU', 'GPU.0', 'GPU.1', 'AUTO'), value='CPU')"
+       "Dropdown(description='Device:', options=('CPU', 'GPU', 'AUTO'), value='CPU')"
       ]
      },
-     "execution_count": 10,
+     "execution_count": 9,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -637,7 +637,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 10,
    "id": "5333ab9b-ff5d-4a7f-bcdc-9cca5d56dc0a",
    "metadata": {
     "tags": []
@@ -659,7 +659,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 11,
    "id": "3536a1a7",
    "metadata": {
     "collapsed": false,
@@ -671,15 +671,15 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "0f954bac863d48f5ab0b9eb779f0a82d",
+       "model_id": "03dc64b5e12e4fb79fb36a63ffef2ef2",
        "version_major": 2,
        "version_minor": 0
       },
       "text/plain": [
-       "Dropdown(description='Model to run:', options=('INT4', 'FP16'), value='INT4')"
+       "Dropdown(description='Model to run:', options=('FP16',), value='FP16')"
       ]
      },
-     "execution_count": 12,
+     "execution_count": 11,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -705,7 +705,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 12,
    "id": "7a041101-7336-40fd-96c9-cd298015a0f3",
    "metadata": {
     "tags": []
@@ -715,7 +715,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Loading model from chatglm2-6b/INT4_compressed_weights\n"
+      "Loading model from chatglm3-6b/FP16\n"
      ]
     },
     {
@@ -760,15 +760,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 13,
    "id": "8f6f7596-5677-4931-875b-aaabfa23cabc",
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/home/ea/work/openvino_notebooks/notebooks/254-llm-chatbot/ov_llm_model.py:400: FutureWarning: `shared_memory` is deprecated and will be removed in 2024.0. Value of `shared_memory` is going to override `share_inputs` value. Please use only `share_inputs` explicitly.\n",
+      "/home/ethan/intel/openvino_notebooks/notebooks/254-llm-chatbot/ov_llm_model.py:400: FutureWarning: `shared_memory` is deprecated and will be removed in 2024.0. Value of `shared_memory` is going to override `share_inputs` value. Please use only `share_inputs` explicitly.\n",
       "  self.request.start_async(inputs, shared_memory=True)\n"
      ]
     },
@@ -833,12 +833,50 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 14,
    "id": "01f8f7f8-072e-45dc-b7c9-18d8c3c47754",
    "metadata": {
     "tags": []
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Running on local URL:  http://10.3.233.70:4768\n",
+      "\n",
+      "To create a public link, set `share=True` in `launch()`.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div><iframe src=\"http://10.3.233.70:4768/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/plain": []
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/ethan/intel/openvino_notebooks/notebooks/254-llm-chatbot/ov_llm_model.py:400: FutureWarning: `shared_memory` is deprecated and will be removed in 2024.0. Value of `shared_memory` is going to override `share_inputs` value. Please use only `share_inputs` explicitly.\n",
+      "  self.request.start_async(inputs, shared_memory=True)\n"
+     ]
+    }
+   ],
    "source": [
     "from threading import Event, Thread\n",
     "from uuid import uuid4\n",
@@ -922,7 +960,7 @@
     "\n",
     "def default_partial_text_processor(partial_text: str, new_text: str):\n",
     "    \"\"\"\n",
-    "    helper for updating partially generated answer, used by de\n",
+    "    helper for updating partially generated answer, used by default\n",
     "\n",
     "    Params:\n",
     "      partial_text: text buffer for storing previosly generated text\n",
@@ -972,7 +1010,7 @@
     "    return text\n",
     "\n",
     "\n",
-    "def user(text, history):\n",
+    "def user(message, history):\n",
     "    \"\"\"\n",
     "    callback function for updating user messages in interface on submit button click\n",
     "\n",
@@ -983,8 +1021,7 @@
     "      None\n",
     "    \"\"\"\n",
     "    # Append the user's message to the conversation history\n",
-    "    history = history + [(text, None)]\n",
-    "    return \"\", history\n",
+    "    return \"\", history + [[message, \"\"]]\n",
     "\n",
     "\n",
     "def bot(history, temperature, top_p, top_k, repetition_penalty, conversation_id):\n",
@@ -1184,13 +1221,21 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 15,
    "id": "7b837f9e-4152-4a5c-880a-ed874aa64a74",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Closing server running on port: 4768\n"
+     ]
+    }
+   ],
    "source": [
     "# please run this cell for stopping gradio interface\n",
-    "# demo.close()"
+    "demo.close()"
    ]
   }
  ],