Updates to docling RAG recipe

Signed-off-by: BJ Hargrave <[email protected]>
ibm-granite-community · Jan 14, 2025 · 01aafb0 · 01aafb0
1 parent 0d1adc2
commit 01aafb0
Showing 1 changed file with 34 additions and 45 deletions.
diff --git a/recipes/RAG/Granite_Docling_RAG.ipynb b/recipes/RAG/Granite_Docling_RAG.ipynb
@@ -9,7 +9,8 @@
     }
    },
    "source": [
-    "#Building an AI-Powered Document Retrieval System with Docling and Granite 3.1\n",
+    "# Building an AI-Powered Document Retrieval System with Docling and Granite 3.1\n",
+    "\n",
     "*Using IBM Granite Models*"
    ]
   },
@@ -29,8 +30,8 @@
     "\n",
     "This workshop leverages two cutting-edge technologies:\n",
     "\n",
-    "1. **[Docling](https://github.com/DS4SD/docling):** An IBM open-source toolkit for parsing and converting documents.\n",
-    "2. **[Granite™ 3.1 8B](https://www.ibm.com/granite):** A state-of-the-art LLM available via an [API](https://www.ibm.com/topics/api) through Replicate, providing robust natural language capabilities.\n",
+    "1. **[Docling](https://github.com/DS4SD/docling):** An open-source toolkit for parsing and converting documents.\n",
+    "2. **[Granite™ 3.1](https://www.ibm.com/granite/docs/models/granite/):** A state-of-the-art LLM available via an [API](https://www.ibm.com/topics/api) through Replicate, providing robust natural language capabilities.\n",
     "3. **[LangChain](https://github.com/langchain-ai/langchain):** A powerful framework for building applications powered by language models, designed to simplify complex workflows and integrate external tools seamlessly.\n",
     "\n",
     "By the end of this recipe, you will:\n",
@@ -121,11 +122,11 @@
    "source": [
     "! pip install \"git+https://github.com/ibm-granite-community/utils.git\" \\\n",
     "    transformers \\\n",
-    "    langchain-community \\\n",
-    "    langchain-huggingface \\\n",
-    "    langchain-milvus \\\n",
+    "    langchain_community \\\n",
+    "    langchain_huggingface \\\n",
+    "    langchain_milvus \\\n",
     "    docling \\\n",
-    "    replicate torch tiktoken"
+    "    replicate"
    ]
   },
   {
@@ -165,10 +166,12 @@
    },
    "outputs": [],
    "source": [
-    "from langchain.embeddings import HuggingFaceEmbeddings\n",
+    "from langchain_huggingface import HuggingFaceEmbeddings\n",
     "from transformers import AutoTokenizer\n",
     "\n",
-    "embeddings_model = HuggingFaceEmbeddings(model_name='ibm-granite/granite-embedding-30m-english')\n",
+    "tokenizer = AutoTokenizer.from_pretrained(\"ibm-granite/granite-3.1-8b-instruct\")\n",
+    "\n",
+    "embeddings_model = HuggingFaceEmbeddings(model_name=\"ibm-granite/granite-embedding-30m-english\")\n",
     "embeddings_tokenizer = AutoTokenizer.from_pretrained(\"ibm-granite/granite-embedding-30m-english\")"
    ]
   },
@@ -178,9 +181,13 @@
     "id": "ma8eWR10_JRJ"
    },
    "source": [
-    "### Use the Granite 3.1 8b model\n",
+    "### Use the Granite 3.1 model\n",
+    "\n",
+    "Select a Granite model from the [`ibm-granite`](https://replicate.com/ibm-granite) org on Replicate. Here we use the Replicate Langchain client to connect to the model.\n",
+    "\n",
+    "To get set up with Replicate, see [Getting Started with Replicate](https://github.com/ibm-granite-community/granite-kitchen/blob/main/recipes/Getting_Started/Getting_Started_with_Replicate.ipynb).\n",
     "\n",
-    "In order for us to use the Granite 3.1 model without running it locally, we can run it through a Replicate API. The instructions to set this up are [here](https://ibm.github.io/granite-workshop/pre-work/#replicate-ai-cloud-platform-for-colab). Once you have your API token, be sure to set the \"Value\" field to the token value, and the \"Name\" field should be \"REPLICATE_API_TOKEN\"."
+    "To connect to a model on a provider other than Replicate, substitute this code cell with one from the [LLM component recipe](https://github.com/ibm-granite-community/granite-kitchen/blob/main/recipes/Components/Langchain_LLMs.ipynb)."
    ]
   },
   {
@@ -191,17 +198,17 @@
    },
    "outputs": [],
    "source": [
-    "from transformers import AutoModelForCausalLM\n",
-    "import os\n",
-    "import requests\n",
     "from langchain_community.llms import Replicate\n",
     "from ibm_granite_community.notebook_utils import get_env_var\n",
     "\n",
-    "model = Replicate(model=\"ibm-granite/granite-3.1-8b-instruct\",\n",
-    "                  replicate_api_token=get_env_var(\"REPLICATE_API_TOKEN\"))\n",
-    "\n",
-    "model_path = \"ibm-granite/granite-3.1-8b-instruct\"\n",
-    "tokenizer = AutoTokenizer.from_pretrained(model_path)"
+    "model = Replicate(\n",
+    "    model=\"ibm-granite/granite-3.1-8b-instruct\",\n",
+    "    replicate_api_token=get_env_var(\"REPLICATE_API_TOKEN\"),\n",
+    "    model_kwargs={\n",
+    "        \"max_tokens\": 1000, # Set the maximum number of tokens to generate as output.\n",
+    "        \"min_tokens\": 100, # Set the minimum number of tokens to generate as output.\n",
+    "    },\n",
+    ")"
    ]
   },
   {
@@ -225,17 +232,9 @@
     "prompt_guide_template = \"\"\"\\\n",
     "<|start_of_role|>user<|end_of_role|>{prompt}<|end_of_text|>\n",
     "<|start_of_role|>assistant<|end_of_role|>\"\"\"\n",
-    "prompt = prompt_guide_template.format(prompt=f\"{query}\"\n",
-    ")\n",
+    "prompt = prompt_guide_template.format(prompt=query)\n",
     "\n",
-    "output = model.invoke(\n",
-    "    prompt,\n",
-    "    model_kwargs={\n",
-    "        \"max_tokens\": 1000, # Set the maximum number of tokens to generate as output.\n",
-    "        \"min_tokens\": 100, # Set the minimum number of tokens to generate as output.\n",
-    "        \"system_prompt\": \"You are a helpful assistant.\",\n",
-    "    }\n",
-    "    )\n",
+    "output = model.invoke(prompt)\n",
     "\n",
     "print(output)"
    ]
@@ -259,16 +258,8 @@
    "source": [
     "query1 = \"How much weight allowance is allowed in non championship fights in the UFC?\"\n",
     "\n",
-    "prompt = prompt_guide_template.format(prompt=f\"{query1}\"\n",
-    ")\n",
-    "output = model.invoke(\n",
-    "    prompt,\n",
-    "    model_kwargs={\n",
-    "        \"max_tokens\": 1000, # Set the maximum number of tokens to generate as output.\n",
-    "        \"min_tokens\": 100, # Set the minimum number of tokens to generate as output.\n",
-    "        \"system_prompt\": \"You are a helpful assistant.\",\n",
-    "    }\n",
-    "    )\n",
+    "prompt = prompt_guide_template.format(prompt=query1)\n",
+    "output = model.invoke(prompt)\n",
     "\n",
     "print(output)"
    ]
@@ -376,10 +367,8 @@
    "outputs": [],
    "source": [
     "# Docling imports\n",
-    "from docling.datamodel.base_models import InputFormat\n",
-    "from docling.datamodel.pipeline_options import PdfPipelineOptions, TesseractCliOcrOptions\n",
-    "from docling.document_converter import DocumentConverter, PdfFormatOption, WordFormatOption, SimplePipeline\n",
-    "from docling.chunking import HybridChunker\n",
+    "from docling.document_converter import DocumentConverter\n",
+    "from docling_core.transforms.chunker.hybrid_chunker import HybridChunker\n",
     "from docling_core.types.doc.labels import DocItemLabel\n",
     "from langchain_core.documents import Document\n",
     "\n",
@@ -516,7 +505,7 @@
     "id": "CxVuFY_A_JRL"
    },
    "source": [
-    "### Create the prompt for Granite 3.1 8b"
+    "### Create the prompt for Granite 3.1"
    ]
   },
   {
@@ -581,7 +570,7 @@
     }
    },
    "source": [
-    "Using the chunks from the similarity search as context, the response from Granite RAG 3.1 8b is recieved in JSON document. This cell then parses the JSON document to retrieve the sentences of the response along with metadata about the sentence which can be used to guide the displayed output."
+    "Using the chunks from the similarity search as context, the response from Granite RAG 3.1 is recieved in JSON document. This cell then parses the JSON document to retrieve the sentences of the response along with metadata about the sentence which can be used to guide the displayed output."
    ]
   },
   {