merge get_results and order_results into one function

lordlinus · Aug 21, 2023 · fe6a33f · fe6a33f
1 parent 8fb90aa
commit fe6a33f
Show file tree

Hide file tree

Showing 9 changed files with 479 additions and 434 deletions.
diff --git a/03-Quering-AOpenAI.ipynb b/03-Quering-AOpenAI.ipynb
@@ -62,12 +62,12 @@
     "from langchain.chains.qa_with_sources import load_qa_with_sources_chain\n",
     "from langchain.embeddings import OpenAIEmbeddings\n",
     "\n",
-    "from common.prompts import COMBINE_QUESTION_PROMPT, COMBINE_PROMPT\n",
+    "from common.prompts import COMBINE_QUESTION_PROMPT, COMBINE_PROMPT, COMBINE_PROMPT_TEMPLATE\n",
     "from common.utils import (\n",
     "    get_search_results,\n",
-    "    order_search_results,\n",
     "    model_tokens_limit,\n",
     "    num_tokens_from_docs,\n",
+    "    num_tokens_from_string\n",
     ")\n",
     "\n",
     "from dotenv import load_dotenv\n",
@@ -909,7 +909,8 @@
    "outputs": [],
    "source": [
     "MODEL = \"gpt-35-turbo\" # options: gpt-35-turbo, gpt-35-turbo-16k, gpt-4, gpt-4-32k\n",
-    "llm = AzureChatOpenAI(deployment_name=MODEL, temperature=0, max_tokens=1000)"
+    "COMPLETION_TOKENS = 1000\n",
+    "llm = AzureChatOpenAI(deployment_name=MODEL, temperature=0, max_tokens=COMPLETION_TOKENS)"
    ]
   },
   {
@@ -1058,11 +1059,19 @@
    "execution_count": 12,
    "id": "3bccca45-d1dd-476f-b109-a528b857b6b3",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Number of results: 20\n"
+     ]
+    }
+   ],
    "source": [
     "k = 10 # Number of results per each text_index\n",
-    "agg_search_results = get_search_results(QUESTION, indexes, k=10)\n",
-    "ordered_results = order_search_results(agg_search_results, k=10*len(indexes), reranker_threshold=1)"
+    "ordered_results = get_search_results(QUESTION, indexes, k=10, reranker_threshold=1)\n",
+    "print(\"Number of results:\",len(ordered_results))"
    ]
   },
   {
@@ -1081,7 +1090,7 @@
    "id": "da70e7a8-7536-4688-b30c-01ba28e9b9f8",
    "metadata": {},
    "source": [
-    "Now we can fill up the vector-based index as users lookup documents using the text-based index. This approach although it requires two searches per user query (one on the text-based indexes and the other one on the vector based indexes), it is simpler to implement and will be incrementatly faster as user use the system."
+    "Now we can fill up the vector-based index as users lookup documents using the text-based index. This approach although it requires two searches per user query (one on the text-based indexes and the other one on the vector-based indexes), it is simpler to implement and will be incrementatly faster as user use the system."
    ]
   },
   {
@@ -1124,8 +1133,8 @@
       "Vectorizing 1 chunks from Document: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5809586/\n",
       "Vectorizing 1 chunks from Document: https://www.ncbi.nlm.nih.gov/pubmed/17414124/\n",
       "Vectorizing 1 chunks from Document: https://doi.org/10.1111/bjh.14134; https://www.ncbi.nlm.nih.gov/pubmed/27173746/\n",
-      "CPU times: user 10.8 s, sys: 165 ms, total: 10.9 s\n",
-      "Wall time: 26.7 s\n"
+      "CPU times: user 8.02 s, sys: 174 ms, total: 8.2 s\n",
+      "Wall time: 23.3 s\n"
      ]
     }
    ],
@@ -1205,19 +1214,28 @@
    "execution_count": 16,
    "id": "61098bb4-33da-4eb4-94cf-503587337aca",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Number of results: 3\n"
+     ]
+    }
+   ],
    "source": [
     "vector_indexes = [index+\"-vector\" for index in indexes]\n",
     "\n",
-    "agg_search_results = get_search_results(QUESTION, vector_indexes,\n",
-    "                                        k=10, # Number of results per vector index\n",
+    "k = 10\n",
+    "similarity_k = 3\n",
+    "ordered_results = get_search_results(QUESTION, vector_indexes,\n",
+    "                                        k=k, # Number of results per vector index\n",
+    "                                        reranker_threshold=1,\n",
     "                                        vector_search=True, \n",
+    "                                        similarity_k=similarity_k,\n",
     "                                        query_vector = embedder.embed_query(QUESTION)\n",
     "                                        )\n",
-    "ordered_results = order_search_results(agg_search_results, \n",
-    "                                       k=3, # Number of top results combined \n",
-    "                                       reranker_threshold=1,\n",
-    "                                       vector_search = True)"
+    "print(\"Number of results:\",len(ordered_results))"
    ]
   },
   {
@@ -1246,7 +1264,7 @@
     "top_docs = []\n",
     "for key,value in ordered_results.items():\n",
     "    location = value[\"location\"] if value[\"location\"] is not None else \"\"\n",
-    "    top_docs.append(Document(page_content=value[\"chunk\"], metadata={\"source\": location+os.environ['BLOB_SAS_TOKEN']}))\n",
+    "    top_docs.append(Document(page_content=value[\"chunk\"], metadata={\"source\": location}))\n",
     "        \n",
     "print(\"Number of chunks:\",len(top_docs))"
    ]
@@ -1261,8 +1279,12 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Custom token limit for gpt-35-turbo : 2000\n",
-      "Combined docs tokens count: 4123\n",
+      "System prompt token count: 1669\n",
+      "Max Completion Token count: 1000\n",
+      "Combined docs (context) token count: 1938\n",
+      "--------\n",
+      "Requested token count: 4607\n",
+      "Token limit for gpt-35-turbo : 4096\n",
       "Chain Type selected: map_reduce\n"
      ]
     }
@@ -1271,10 +1293,19 @@
     "# Calculate number of tokens of our docs\n",
     "if(len(top_docs)>0):\n",
     "    tokens_limit = model_tokens_limit(MODEL) # this is a custom function we created in common/utils.py\n",
-    "    num_tokens = num_tokens_from_docs(top_docs) # this is a custom function we created in common/utils.py\n",
-    "    chain_type = \"map_reduce\" if num_tokens > tokens_limit else \"stuff\"  \n",
-    "    print(\"Custom token limit for\", MODEL, \":\", tokens_limit)\n",
-    "    print(\"Combined docs tokens count:\",num_tokens)\n",
+    "    prompt_tokens = num_tokens_from_string(COMBINE_PROMPT_TEMPLATE) # this is a custom function we created in common/utils.py\n",
+    "    context_tokens = num_tokens_from_docs(top_docs) # this is a custom function we created in common/utils.py\n",
+    "    \n",
+    "    requested_tokens = prompt_tokens + context_tokens + COMPLETION_TOKENS\n",
+    "    \n",
+    "    chain_type = \"map_reduce\" if requested_tokens > 0.9 * tokens_limit else \"stuff\"  \n",
+    "    \n",
+    "    print(\"System prompt token count:\",prompt_tokens)\n",
+    "    print(\"Max Completion Token count:\", COMPLETION_TOKENS)\n",
+    "    print(\"Combined docs (context) token count:\",context_tokens)\n",
+    "    print(\"--------\")\n",
+    "    print(\"Requested token count:\",requested_tokens)\n",
+    "    print(\"Token limit for\", MODEL, \":\", tokens_limit)\n",
     "    print(\"Chain Type selected:\", chain_type)\n",
     "        \n",
     "else:\n",
@@ -1316,8 +1347,8 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "CPU times: user 18.4 ms, sys: 0 ns, total: 18.4 ms\n",
-      "Wall time: 11.1 s\n"
+      "CPU times: user 17 ms, sys: 0 ns, total: 17 ms\n",
+      "Wall time: 4.58 s\n"
      ]
     }
    ],
@@ -1336,7 +1367,7 @@
     {
      "data": {
       "text/markdown": [
-       "CLP stands for Constraint Logic Programming. It is a framework that combines logic programming with constraints to solve problems. In CLP, programs are written in a logic programming language and constraints are used to restrict the possible solutions. CLP allows for the specification of constraints on variables and the use of constraint solvers to find solutions that satisfy these constraints<sup><a href=\"https://demodatasetsp.blob.core.windows.net/arxivcs/0508/0508106v1.pdf?sv=2022-11-02&ss=bf&srt=sco&sp=rltfx&se=2024-10-02T01:02:07Z&st=2023-08-03T17:02:07Z&spr=https&sig=gLxStXFSY6X29OPpPDpBEhoQDdtJNDrMVExNYJ%2BhmBQ%3D\" target=\"_blank\">[3]</a></sup>."
+       "CLP can refer to different things depending on the context. In the context of the provided information, CLP stands for Consultation-Liaison Psychiatry<sup><a href=\"https://api.elsevier.com/content/article/pii/S0033318220301420\" target=\"_blank\">[2]</a></sup>."
       ],
       "text/plain": [
        "<IPython.core.display.Markdown object>"
@@ -1355,7 +1386,7 @@
    "id": "05e27c75-bfd9-4304-b2fd-c8e30bcc0558",
    "metadata": {},
    "source": [
-    "**Please Note**: There are instances where, despite the answer's high accuracy and quality, the references are not done according to the instructions provided in the COMBINE_PROMPT. This behavior is anticipated when dealing with GPT-3.5 models. We will provide a more detailed explanation of this phenomenon towards the conclusion of Notebook 5."
+    "**Please Note**: There are some instances where, despite the answer's high accuracy and quality, the references are not done according to the instructions provided in the COMBINE_PROMPT. This behavior is anticipated when dealing with GPT-3.5 models. We will provide a more detailed explanation of this phenomenon towards the conclusion of Notebook 5."
    ]
   },
   {