diff --git a/404.html b/404.html index 611f44d491..a6c7711333 100644 --- a/404.html +++ b/404.html @@ -16,13 +16,13 @@ - +
Skip to main content

Sorry! Page Not Found

We have been doing some work on our website, chances are that the page you're looking for is in the new docs section.

- + \ No newline at end of file diff --git a/assets/js/120352d6.0e62ec09.js b/assets/js/120352d6.0e62ec09.js new file mode 100644 index 0000000000..cbafdd864c --- /dev/null +++ b/assets/js/120352d6.0e62ec09.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunksynapseml=self.webpackChunksynapseml||[]).push([[89262],{3905:(e,n,t)=>{t.d(n,{Zo:()=>c,kt:()=>m});var r=t(67294);function a(e,n,t){return n in e?Object.defineProperty(e,n,{value:t,enumerable:!0,configurable:!0,writable:!0}):e[n]=t,e}function o(e,n){var t=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);n&&(r=r.filter((function(n){return Object.getOwnPropertyDescriptor(e,n).enumerable}))),t.push.apply(t,r)}return t}function s(e){for(var n=1;n=0||(a[t]=e[t]);return a}(e,n);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,t)&&(a[t]=e[t])}return a}var l=r.createContext({}),p=function(e){var n=r.useContext(l),t=n;return e&&(t="function"==typeof e?e(n):s(s({},n),e)),t},c=function(e){var n=p(e.components);return r.createElement(l.Provider,{value:n},e.children)},u={inlineCode:"code",wrapper:function(e){var n=e.children;return r.createElement(r.Fragment,{},n)}},d=r.forwardRef((function(e,n){var t=e.components,a=e.mdxType,o=e.originalType,l=e.parentName,c=i(e,["components","mdxType","originalType","parentName"]),d=p(t),m=a,h=d["".concat(l,".").concat(m)]||d[m]||u[m]||o;return t?r.createElement(h,s(s({ref:n},c),{},{components:t})):r.createElement(h,s({ref:n},c))}));function m(e,n){var t=arguments,a=n&&n.mdxType;if("string"==typeof e||a){var o=t.length,s=new Array(o);s[0]=d;var i={};for(var l in n)hasOwnProperty.call(n,l)&&(i[l]=n[l]);i.originalType=e,i.mdxType="string"==typeof e?e:a,s[1]=i;for(var p=2;p{t.r(n),t.d(n,{assets:()=>l,contentTitle:()=>s,default:()=>u,frontMatter:()=>o,metadata:()=>i,toc:()=>p});var r=t(83117),a=(t(67294),t(3905));const o={title:"Quickstart - Understand and Search Forms",hide_title:!0,status:"stable"},s="Tutorial: Create a custom search engine and question-answering system",i={unversionedId:"Explore Algorithms/OpenAI/Quickstart - Understand and Search Forms",id:"Explore Algorithms/OpenAI/Quickstart - Understand and Search Forms",title:"Quickstart - Understand and Search Forms",description:"In this tutorial, learn how to index and query large data loaded from a Spark cluster. You set up a Jupyter Notebook that performs the following actions:",source:"@site/docs/Explore Algorithms/OpenAI/Quickstart - Understand and Search Forms.md",sourceDirName:"Explore Algorithms/OpenAI",slug:"/Explore Algorithms/OpenAI/Quickstart - Understand and Search Forms",permalink:"/SynapseML/docs/next/Explore Algorithms/OpenAI/Quickstart - Understand and Search Forms",draft:!1,tags:[],version:"current",frontMatter:{title:"Quickstart - Understand and Search Forms",hide_title:!0,status:"stable"},sidebar:"docs",previous:{title:"Quickstart - OpenAI Embedding and GPU based KNN",permalink:"/SynapseML/docs/next/Explore Algorithms/OpenAI/Quickstart - OpenAI Embedding and GPU based KNN"},next:{title:"Getting Started",permalink:"/SynapseML/docs/next/Explore Algorithms/Deep Learning/Getting Started"}},l={},p=[{value:"1 - Set up dependencies",id:"1---set-up-dependencies",level:2},{value:"2 - Load data into Spark",id:"2---load-data-into-spark",level:2},{value:"3 - Apply form recognition",id:"3---apply-form-recognition",level:2},{value:"4 - Simplify form recognition output",id:"4---simplify-form-recognition-output",level:2},{value:"5 - Add translations",id:"5---add-translations",level:2},{value:"6 - Translate products to emojis with OpenAI \ud83e\udd2f",id:"6---translate-products-to-emojis-with-openai-",level:2},{value:"7 - Infer vendor address continent with OpenAI",id:"7---infer-vendor-address-continent-with-openai",level:2},{value:"8 - Create an Azure Search Index for the Forms",id:"8---create-an-azure-search-index-for-the-forms",level:2},{value:"9 - Try out a search query",id:"9---try-out-a-search-query",level:2},{value:"10 - Build a chatbot that can use Azure Search as a tool \ud83e\udde0\ud83d\udd27",id:"10---build-a-chatbot-that-can-use-azure-search-as-a-tool-",level:2},{value:"11 - Asking our chatbot a question",id:"11---asking-our-chatbot-a-question",level:2},{value:"12 - A quick double check",id:"12---a-quick-double-check",level:2}],c={toc:p};function u(e){let{components:n,...t}=e;return(0,a.kt)("wrapper",(0,r.Z)({},c,t,{components:n,mdxType:"MDXLayout"}),(0,a.kt)("h1",{id:"tutorial-create-a-custom-search-engine-and-question-answering-system"},"Tutorial: Create a custom search engine and question-answering system"),(0,a.kt)("p",null,"In this tutorial, learn how to index and query large data loaded from a Spark cluster. You set up a Jupyter Notebook that performs the following actions:"),(0,a.kt)("blockquote",null,(0,a.kt)("ul",{parentName:"blockquote"},(0,a.kt)("li",{parentName:"ul"},"Load various forms (invoices) into a data frame in an Apache Spark session"),(0,a.kt)("li",{parentName:"ul"},"Analyze them to determine their features"),(0,a.kt)("li",{parentName:"ul"},"Assemble the resulting output into a tabular data structure"),(0,a.kt)("li",{parentName:"ul"},"Write the output to a search index hosted in Azure Cognitive Search"),(0,a.kt)("li",{parentName:"ul"},"Explore and query over the content you created"))),(0,a.kt)("h2",{id:"1---set-up-dependencies"},"1 - Set up dependencies"),(0,a.kt)("p",null,"We start by importing packages and connecting to the Azure resources used in this workflow."),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-python"},"%pip install openai==0.28.1\n")),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-python"},'from synapse.ml.core.platform import find_secret\n\ncognitive_key = find_secret(\n secret_name="ai-services-api-key", keyvault="mmlspark-build-keys"\n) # Replace the call to find_secret with your key as a python string. e.g. cognitive_key="27snaiw..."\ncognitive_location = "eastus"\n\ntranslator_key = find_secret(\n secret_name="translator-key", keyvault="mmlspark-build-keys"\n) # Replace the call to find_secret with your key as a python string.\ntranslator_location = "eastus"\n\nsearch_key = find_secret(\n secret_name="azure-search-key", keyvault="mmlspark-build-keys"\n) # Replace the call to find_secret with your key as a python string.\nsearch_service = "mmlspark-azure-search"\nsearch_index = "form-demo-index-5"\n\nopenai_key = find_secret(\n secret_name="openai-api-key-2", keyvault="mmlspark-build-keys"\n) # Replace the call to find_secret with your key as a python string.\nopenai_service_name = "synapseml-openai-2"\nopenai_deployment_name = "gpt-35-turbo"\nopenai_url = f"https://{openai_service_name}.openai.azure.com/"\n')),(0,a.kt)("h2",{id:"2---load-data-into-spark"},"2 - Load data into Spark"),(0,a.kt)("p",null,"This code loads a few external files from an Azure storage account that's used for demo purposes. The files are various invoices, and they're read into a data frame."),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-python"},'from pyspark.sql.functions import udf\nfrom pyspark.sql.types import StringType\n\n\ndef blob_to_url(blob):\n [prefix, postfix] = blob.split("@")\n container = prefix.split("/")[-1]\n split_postfix = postfix.split("/")\n account = split_postfix[0]\n filepath = "/".join(split_postfix[1:])\n return "https://{}/{}/{}".format(account, container, filepath)\n\n\ndf2 = (\n spark.read.format("binaryFile")\n .load("wasbs://publicwasb@mmlspark.blob.core.windows.net/form_subset/*")\n .select("path")\n .limit(10)\n .select(udf(blob_to_url, StringType())("path").alias("url"))\n .cache()\n)\n\ndisplay(df2)\n')),(0,a.kt)("img",{src:"https://mmlspark.blob.core.windows.net/graphics/Invoice11205.svg",width:"40%"}),(0,a.kt)("h2",{id:"3---apply-form-recognition"},"3 - Apply form recognition"),(0,a.kt)("p",null,"This code loads the ",(0,a.kt)("a",{parentName:"p",href:"https://microsoft.github.io/SynapseML/docs/Explore%20Algorithms/AI%20Services/Overview/#form-recognizer"},"AnalyzeInvoices transformer")," and passes a reference to the data frame containing the invoices. It calls the pre-built invoice model of Azure Forms Analyzer."),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-python"},'from synapse.ml.services.form import AnalyzeInvoices\n\nanalyzed_df = (\n AnalyzeInvoices()\n .setSubscriptionKey(cognitive_key)\n .setLocation(cognitive_location)\n .setImageUrlCol("url")\n .setOutputCol("invoices")\n .setErrorCol("errors")\n .setConcurrency(5)\n .transform(df2)\n .cache()\n)\n\ndisplay(analyzed_df)\n')),(0,a.kt)("h2",{id:"4---simplify-form-recognition-output"},"4 - Simplify form recognition output"),(0,a.kt)("p",null,"This code uses the ",(0,a.kt)("a",{parentName:"p",href:"https://mmlspark.blob.core.windows.net/docs/1.0.4/pyspark/synapse.ml.services.form.html#module-synapse.ml.services.form.FormOntologyTransformer"},"FormOntologyLearner"),", a transformer that analyzes the output of Form Recognizer transformers (for Azure AI Document Intelligence) and infers a tabular data structure. The output of AnalyzeInvoices is dynamic and varies based on the features detected in your content."),(0,a.kt)("p",null,"FormOntologyLearner extends the utility of the AnalyzeInvoices transformer by looking for patterns that can be used to create a tabular data structure. Organizing the output into multiple columns and rows makes for simpler downstream analysis."),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-python"},'from synapse.ml.services.form import FormOntologyLearner\n\norganized_df = (\n FormOntologyLearner()\n .setInputCol("invoices")\n .setOutputCol("extracted")\n .fit(analyzed_df)\n .transform(analyzed_df)\n .select("url", "extracted.*")\n .cache()\n)\n\ndisplay(organized_df)\n')),(0,a.kt)("p",null,"With our nice tabular dataframe, we can flatten the nested tables found in the forms with some SparkSQL"),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-python"},'from pyspark.sql.functions import explode, col\n\nitemized_df = (\n organized_df.select("*", explode(col("Items")).alias("Item"))\n .drop("Items")\n .select("Item.*", "*")\n .drop("Item")\n)\n\ndisplay(itemized_df)\n')),(0,a.kt)("h2",{id:"5---add-translations"},"5 - Add translations"),(0,a.kt)("p",null,"This code loads ",(0,a.kt)("a",{parentName:"p",href:"https://microsoft.github.io/SynapseML/docs/Explore%20Algorithms/AI%20Services/Overview/#translation"},"Translate"),', a transformer that calls the Azure AI Translator service in Azure AI services. The original text, which is in English in the "Description" column, is machine-translated into various languages. All of the output is consolidated into "output.translations" array.'),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-python"},'from synapse.ml.services.translate import Translate\n\ntranslated_df = (\n Translate()\n .setSubscriptionKey(translator_key)\n .setLocation(translator_location)\n .setTextCol("Description")\n .setErrorCol("TranslationError")\n .setOutputCol("output")\n .setToLanguage(["zh-Hans", "fr", "ru", "cy"])\n .setConcurrency(5)\n .transform(itemized_df)\n .withColumn("Translations", col("output.translations")[0])\n .drop("output", "TranslationError")\n .cache()\n)\n\ndisplay(translated_df)\n')),(0,a.kt)("h2",{id:"6---translate-products-to-emojis-with-openai-"},"6 - Translate products to emojis with OpenAI \ud83e\udd2f"),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-python"},'from synapse.ml.services.openai import OpenAIPrompt\nfrom pyspark.sql.functions import trim, split\n\nemoji_template = """ \n Your job is to translate item names into emoji. Do not add anything but the emoji and end the translation with a comma\n \n Two Ducks: \ud83e\udd86\ud83e\udd86,\n Light Bulb: \ud83d\udca1,\n Three Peaches: \ud83c\udf51\ud83c\udf51\ud83c\udf51,\n Two kitchen stoves: \u2668\ufe0f\u2668\ufe0f,\n A red car: \ud83d\ude97,\n A person and a cat: \ud83e\uddcd\ud83d\udc08,\n A {Description}: """\n\nprompter = (\n OpenAIPrompt()\n .setSubscriptionKey(openai_key)\n .setDeploymentName(openai_deployment_name)\n .setUrl(openai_url)\n .setMaxTokens(5)\n .setPromptTemplate(emoji_template)\n .setErrorCol("error")\n .setOutputCol("Emoji")\n)\n\nemoji_df = (\n prompter.transform(translated_df)\n .withColumn("Emoji", trim(split(col("Emoji"), ",").getItem(0)))\n .drop("error", "prompt")\n .cache()\n)\n')),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-python"},'display(emoji_df.select("Description", "Emoji"))\n')),(0,a.kt)("h2",{id:"7---infer-vendor-address-continent-with-openai"},"7 - Infer vendor address continent with OpenAI"),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-python"},'continent_template = """\nWhich continent does the following address belong to? \n\nPick one value from Europe, Australia, North America, South America, Asia, Africa, Antarctica. \n\nDont respond with anything but one of the above. If you don\'t know the answer or cannot figure it out from the text, return None. End your answer with a comma.\n\nAddress: "6693 Ryan Rd, North Whales",\nContinent: Europe,\nAddress: "6693 Ryan Rd",\nContinent: None,\nAddress: "{VendorAddress}",\nContinent:"""\n\ncontinent_df = (\n prompter.setOutputCol("Continent")\n .setPromptTemplate(continent_template)\n .transform(emoji_df)\n .withColumn("Continent", trim(split(col("Continent"), ",").getItem(0)))\n .drop("error", "prompt")\n .cache()\n)\n')),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-python"},'display(continent_df.select("VendorAddress", "Continent"))\n')),(0,a.kt)("h2",{id:"8---create-an-azure-search-index-for-the-forms"},"8 - Create an Azure Search Index for the Forms"),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-python"},'from synapse.ml.services import *\nfrom pyspark.sql.functions import monotonically_increasing_id, lit\n\n(\n continent_df.withColumn("DocID", monotonically_increasing_id().cast("string"))\n .withColumn("SearchAction", lit("upload"))\n .writeToAzureSearch(\n subscriptionKey=search_key,\n actionCol="SearchAction",\n serviceName=search_service,\n indexName=search_index,\n keyCol="DocID",\n )\n)\n')),(0,a.kt)("h2",{id:"9---try-out-a-search-query"},"9 - Try out a search query"),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-python"},'import requests\n\nsearch_url = "https://{}.search.windows.net/indexes/{}/docs/search?api-version=2019-05-06".format(\n search_service, search_index\n)\nrequests.post(\n search_url, json={"search": "door"}, headers={"api-key": search_key}\n).json()\n')),(0,a.kt)("h2",{id:"10---build-a-chatbot-that-can-use-azure-search-as-a-tool-"},"10 - Build a chatbot that can use Azure Search as a tool \ud83e\udde0\ud83d\udd27"),(0,a.kt)("img",{src:"https://mmlspark.blob.core.windows.net/graphics/notebooks/chatbot_flow_2.svg",width:"40%"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-python"},'import json\nimport openai\n\nopenai.api_type = "azure"\nopenai.api_base = openai_url\nopenai.api_key = openai_key\nopenai.api_version = "2023-03-15-preview"\n\nchat_context_prompt = f"""\nYou are a chatbot designed to answer questions with the help of a search engine that has the following information:\n\n{continent_df.columns}\n\nIf you dont know the answer to a question say "I dont know". Do not lie or hallucinate information. Be brief. If you need to use the search engine to solve the please output a json in the form of {{"query": "example_query"}}\n"""\n\n\ndef search_query_prompt(question):\n return f"""\nGiven the search engine above, what would you search for to answer the following question?\n\nQuestion: "{question}"\n\nPlease output a json in the form of {{"query": "example_query"}}\n"""\n\n\ndef search_result_prompt(query):\n search_results = requests.post(\n search_url, json={"search": query}, headers={"api-key": search_key}\n ).json()\n return f"""\n\nYou previously ran a search for "{query}" which returned the following results:\n\n{search_results}\n\nYou should use the results to help you answer questions. If you dont know the answer to a question say "I dont know". Do not lie or hallucinate information. Be Brief and mention which query you used to solve the problem. \n"""\n\n\ndef prompt_gpt(messages):\n response = openai.ChatCompletion.create(\n engine=openai_deployment_name, messages=messages, max_tokens=None, top_p=0.95\n )\n return response["choices"][0]["message"]["content"]\n\n\ndef custom_chatbot(question):\n while True:\n try:\n query = json.loads(\n prompt_gpt(\n [\n {"role": "system", "content": chat_context_prompt},\n {"role": "user", "content": search_query_prompt(question)},\n ]\n )\n )["query"]\n\n return prompt_gpt(\n [\n {"role": "system", "content": chat_context_prompt},\n {"role": "system", "content": search_result_prompt(query)},\n {"role": "user", "content": question},\n ]\n )\n except Exception as e:\n raise e\n')),(0,a.kt)("h2",{id:"11---asking-our-chatbot-a-question"},"11 - Asking our chatbot a question"),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-python"},'custom_chatbot("What did Luke Diaz buy?")\n')),(0,a.kt)("h2",{id:"12---a-quick-double-check"},"12 - A quick double check"),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-python"},'display(\n continent_df.where(col("CustomerName") == "Luke Diaz")\n .select("Description")\n .distinct()\n)\n')))}u.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/120352d6.7b9ebd97.js b/assets/js/120352d6.7b9ebd97.js deleted file mode 100644 index 83692018c7..0000000000 --- a/assets/js/120352d6.7b9ebd97.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunksynapseml=self.webpackChunksynapseml||[]).push([[89262],{3905:(e,n,t)=>{t.d(n,{Zo:()=>c,kt:()=>m});var r=t(67294);function a(e,n,t){return n in e?Object.defineProperty(e,n,{value:t,enumerable:!0,configurable:!0,writable:!0}):e[n]=t,e}function o(e,n){var t=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);n&&(r=r.filter((function(n){return Object.getOwnPropertyDescriptor(e,n).enumerable}))),t.push.apply(t,r)}return t}function s(e){for(var n=1;n=0||(a[t]=e[t]);return a}(e,n);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,t)&&(a[t]=e[t])}return a}var l=r.createContext({}),p=function(e){var n=r.useContext(l),t=n;return e&&(t="function"==typeof e?e(n):s(s({},n),e)),t},c=function(e){var n=p(e.components);return r.createElement(l.Provider,{value:n},e.children)},u={inlineCode:"code",wrapper:function(e){var n=e.children;return r.createElement(r.Fragment,{},n)}},d=r.forwardRef((function(e,n){var t=e.components,a=e.mdxType,o=e.originalType,l=e.parentName,c=i(e,["components","mdxType","originalType","parentName"]),d=p(t),m=a,h=d["".concat(l,".").concat(m)]||d[m]||u[m]||o;return t?r.createElement(h,s(s({ref:n},c),{},{components:t})):r.createElement(h,s({ref:n},c))}));function m(e,n){var t=arguments,a=n&&n.mdxType;if("string"==typeof e||a){var o=t.length,s=new Array(o);s[0]=d;var i={};for(var l in n)hasOwnProperty.call(n,l)&&(i[l]=n[l]);i.originalType=e,i.mdxType="string"==typeof e?e:a,s[1]=i;for(var p=2;p{t.r(n),t.d(n,{assets:()=>l,contentTitle:()=>s,default:()=>u,frontMatter:()=>o,metadata:()=>i,toc:()=>p});var r=t(83117),a=(t(67294),t(3905));const o={title:"Quickstart - Understand and Search Forms",hide_title:!0,status:"stable"},s="Tutorial: Create a custom search engine and question-answering system",i={unversionedId:"Explore Algorithms/OpenAI/Quickstart - Understand and Search Forms",id:"Explore Algorithms/OpenAI/Quickstart - Understand and Search Forms",title:"Quickstart - Understand and Search Forms",description:"In this tutorial, learn how to index and query large data loaded from a Spark cluster. You set up a Jupyter Notebook that performs the following actions:",source:"@site/docs/Explore Algorithms/OpenAI/Quickstart - Understand and Search Forms.md",sourceDirName:"Explore Algorithms/OpenAI",slug:"/Explore Algorithms/OpenAI/Quickstart - Understand and Search Forms",permalink:"/SynapseML/docs/next/Explore Algorithms/OpenAI/Quickstart - Understand and Search Forms",draft:!1,tags:[],version:"current",frontMatter:{title:"Quickstart - Understand and Search Forms",hide_title:!0,status:"stable"},sidebar:"docs",previous:{title:"Quickstart - OpenAI Embedding and GPU based KNN",permalink:"/SynapseML/docs/next/Explore Algorithms/OpenAI/Quickstart - OpenAI Embedding and GPU based KNN"},next:{title:"Getting Started",permalink:"/SynapseML/docs/next/Explore Algorithms/Deep Learning/Getting Started"}},l={},p=[{value:"1 - Set up dependencies",id:"1---set-up-dependencies",level:2},{value:"2 - Load data into Spark",id:"2---load-data-into-spark",level:2},{value:"3 - Apply form recognition",id:"3---apply-form-recognition",level:2},{value:"4 - Simplify form recognition output",id:"4---simplify-form-recognition-output",level:2},{value:"5 - Add translations",id:"5---add-translations",level:2},{value:"6 - Translate products to emojis with OpenAI \ud83e\udd2f",id:"6---translate-products-to-emojis-with-openai-",level:2},{value:"7 - Infer vendor address continent with OpenAI",id:"7---infer-vendor-address-continent-with-openai",level:2},{value:"8 - Create an Azure Search Index for the Forms",id:"8---create-an-azure-search-index-for-the-forms",level:2},{value:"9 - Try out a search query",id:"9---try-out-a-search-query",level:2},{value:"10 - Build a chatbot that can use Azure Search as a tool \ud83e\udde0\ud83d\udd27",id:"10---build-a-chatbot-that-can-use-azure-search-as-a-tool-",level:2},{value:"11 - Asking our chatbot a question",id:"11---asking-our-chatbot-a-question",level:2},{value:"12 - A quick double check",id:"12---a-quick-double-check",level:2}],c={toc:p};function u(e){let{components:n,...t}=e;return(0,a.kt)("wrapper",(0,r.Z)({},c,t,{components:n,mdxType:"MDXLayout"}),(0,a.kt)("h1",{id:"tutorial-create-a-custom-search-engine-and-question-answering-system"},"Tutorial: Create a custom search engine and question-answering system"),(0,a.kt)("p",null,"In this tutorial, learn how to index and query large data loaded from a Spark cluster. You set up a Jupyter Notebook that performs the following actions:"),(0,a.kt)("blockquote",null,(0,a.kt)("ul",{parentName:"blockquote"},(0,a.kt)("li",{parentName:"ul"},"Load various forms (invoices) into a data frame in an Apache Spark session"),(0,a.kt)("li",{parentName:"ul"},"Analyze them to determine their features"),(0,a.kt)("li",{parentName:"ul"},"Assemble the resulting output into a tabular data structure"),(0,a.kt)("li",{parentName:"ul"},"Write the output to a search index hosted in Azure Cognitive Search"),(0,a.kt)("li",{parentName:"ul"},"Explore and query over the content you created"))),(0,a.kt)("h2",{id:"1---set-up-dependencies"},"1 - Set up dependencies"),(0,a.kt)("p",null,"We start by importing packages and connecting to the Azure resources used in this workflow."),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-python"},"%pip install openai==0.28.1\n")),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-python"},'from synapse.ml.core.platform import find_secret\n\ncognitive_key = find_secret(\n secret_name="ai-services-api-key", keyvault="mmlspark-build-keys"\n) # Replace the call to find_secret with your key as a python string. e.g. cognitive_key="27snaiw..."\ncognitive_location = "eastus"\n\ntranslator_key = find_secret(\n secret_name="translator-key", keyvault="mmlspark-build-keys"\n) # Replace the call to find_secret with your key as a python string.\ntranslator_location = "eastus"\n\nsearch_key = find_secret(\n secret_name="azure-search-key", keyvault="mmlspark-build-keys"\n) # Replace the call to find_secret with your key as a python string.\nsearch_service = "mmlspark-azure-search"\nsearch_index = "form-demo-index-5"\n\nopenai_key = find_secret(\n secret_name="openai-api-key", keyvault="mmlspark-build-keys"\n) # Replace the call to find_secret with your key as a python string.\nopenai_service_name = "synapseml-openai"\nopenai_deployment_name = "gpt-35-turbo"\nopenai_url = f"https://{openai_service_name}.openai.azure.com/"\n')),(0,a.kt)("h2",{id:"2---load-data-into-spark"},"2 - Load data into Spark"),(0,a.kt)("p",null,"This code loads a few external files from an Azure storage account that's used for demo purposes. The files are various invoices, and they're read into a data frame."),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-python"},'from pyspark.sql.functions import udf\nfrom pyspark.sql.types import StringType\n\n\ndef blob_to_url(blob):\n [prefix, postfix] = blob.split("@")\n container = prefix.split("/")[-1]\n split_postfix = postfix.split("/")\n account = split_postfix[0]\n filepath = "/".join(split_postfix[1:])\n return "https://{}/{}/{}".format(account, container, filepath)\n\n\ndf2 = (\n spark.read.format("binaryFile")\n .load("wasbs://ignite2021@mmlsparkdemo.blob.core.windows.net/form_subset/*")\n .select("path")\n .limit(10)\n .select(udf(blob_to_url, StringType())("path").alias("url"))\n .cache()\n)\n\ndisplay(df2)\n')),(0,a.kt)("img",{src:"https://mmlsparkdemo.blob.core.windows.net/ignite2021/form_svgs/Invoice11205.svg",width:"40%"}),(0,a.kt)("h2",{id:"3---apply-form-recognition"},"3 - Apply form recognition"),(0,a.kt)("p",null,"This code loads the ",(0,a.kt)("a",{parentName:"p",href:"https://microsoft.github.io/SynapseML/docs/Explore%20Algorithms/AI%20Services/Overview/#form-recognizer"},"AnalyzeInvoices transformer")," and passes a reference to the data frame containing the invoices. It calls the pre-built invoice model of Azure Forms Analyzer."),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-python"},'from synapse.ml.services.form import AnalyzeInvoices\n\nanalyzed_df = (\n AnalyzeInvoices()\n .setSubscriptionKey(cognitive_key)\n .setLocation(cognitive_location)\n .setImageUrlCol("url")\n .setOutputCol("invoices")\n .setErrorCol("errors")\n .setConcurrency(5)\n .transform(df2)\n .cache()\n)\n\ndisplay(analyzed_df)\n')),(0,a.kt)("h2",{id:"4---simplify-form-recognition-output"},"4 - Simplify form recognition output"),(0,a.kt)("p",null,"This code uses the ",(0,a.kt)("a",{parentName:"p",href:"https://mmlspark.blob.core.windows.net/docs/1.0.4/pyspark/synapse.ml.services.form.html#module-synapse.ml.services.form.FormOntologyTransformer"},"FormOntologyLearner"),", a transformer that analyzes the output of Form Recognizer transformers (for Azure AI Document Intelligence) and infers a tabular data structure. The output of AnalyzeInvoices is dynamic and varies based on the features detected in your content."),(0,a.kt)("p",null,"FormOntologyLearner extends the utility of the AnalyzeInvoices transformer by looking for patterns that can be used to create a tabular data structure. Organizing the output into multiple columns and rows makes for simpler downstream analysis."),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-python"},'from synapse.ml.services.form import FormOntologyLearner\n\norganized_df = (\n FormOntologyLearner()\n .setInputCol("invoices")\n .setOutputCol("extracted")\n .fit(analyzed_df)\n .transform(analyzed_df)\n .select("url", "extracted.*")\n .cache()\n)\n\ndisplay(organized_df)\n')),(0,a.kt)("p",null,"With our nice tabular dataframe, we can flatten the nested tables found in the forms with some SparkSQL"),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-python"},'from pyspark.sql.functions import explode, col\n\nitemized_df = (\n organized_df.select("*", explode(col("Items")).alias("Item"))\n .drop("Items")\n .select("Item.*", "*")\n .drop("Item")\n)\n\ndisplay(itemized_df)\n')),(0,a.kt)("h2",{id:"5---add-translations"},"5 - Add translations"),(0,a.kt)("p",null,"This code loads ",(0,a.kt)("a",{parentName:"p",href:"https://microsoft.github.io/SynapseML/docs/Explore%20Algorithms/AI%20Services/Overview/#translation"},"Translate"),', a transformer that calls the Azure AI Translator service in Azure AI services. The original text, which is in English in the "Description" column, is machine-translated into various languages. All of the output is consolidated into "output.translations" array.'),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-python"},'from synapse.ml.services.translate import Translate\n\ntranslated_df = (\n Translate()\n .setSubscriptionKey(translator_key)\n .setLocation(translator_location)\n .setTextCol("Description")\n .setErrorCol("TranslationError")\n .setOutputCol("output")\n .setToLanguage(["zh-Hans", "fr", "ru", "cy"])\n .setConcurrency(5)\n .transform(itemized_df)\n .withColumn("Translations", col("output.translations")[0])\n .drop("output", "TranslationError")\n .cache()\n)\n\ndisplay(translated_df)\n')),(0,a.kt)("h2",{id:"6---translate-products-to-emojis-with-openai-"},"6 - Translate products to emojis with OpenAI \ud83e\udd2f"),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-python"},'from synapse.ml.services.openai import OpenAIPrompt\nfrom pyspark.sql.functions import trim, split\n\nemoji_template = """ \n Your job is to translate item names into emoji. Do not add anything but the emoji and end the translation with a comma\n \n Two Ducks: \ud83e\udd86\ud83e\udd86,\n Light Bulb: \ud83d\udca1,\n Three Peaches: \ud83c\udf51\ud83c\udf51\ud83c\udf51,\n Two kitchen stoves: \u2668\ufe0f\u2668\ufe0f,\n A red car: \ud83d\ude97,\n A person and a cat: \ud83e\uddcd\ud83d\udc08,\n A {Description}: """\n\nprompter = (\n OpenAIPrompt()\n .setSubscriptionKey(openai_key)\n .setDeploymentName(openai_deployment_name)\n .setUrl(openai_url)\n .setMaxTokens(5)\n .setPromptTemplate(emoji_template)\n .setErrorCol("error")\n .setOutputCol("Emoji")\n)\n\nemoji_df = (\n prompter.transform(translated_df)\n .withColumn("Emoji", trim(split(col("Emoji"), ",").getItem(0)))\n .drop("error", "prompt")\n .cache()\n)\n')),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-python"},'display(emoji_df.select("Description", "Emoji"))\n')),(0,a.kt)("h2",{id:"7---infer-vendor-address-continent-with-openai"},"7 - Infer vendor address continent with OpenAI"),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-python"},'continent_template = """\nWhich continent does the following address belong to? \n\nPick one value from Europe, Australia, North America, South America, Asia, Africa, Antarctica. \n\nDont respond with anything but one of the above. If you don\'t know the answer or cannot figure it out from the text, return None. End your answer with a comma.\n\nAddress: "6693 Ryan Rd, North Whales",\nContinent: Europe,\nAddress: "6693 Ryan Rd",\nContinent: None,\nAddress: "{VendorAddress}",\nContinent:"""\n\ncontinent_df = (\n prompter.setOutputCol("Continent")\n .setPromptTemplate(continent_template)\n .transform(emoji_df)\n .withColumn("Continent", trim(split(col("Continent"), ",").getItem(0)))\n .drop("error", "prompt")\n .cache()\n)\n')),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-python"},'display(continent_df.select("VendorAddress", "Continent"))\n')),(0,a.kt)("h2",{id:"8---create-an-azure-search-index-for-the-forms"},"8 - Create an Azure Search Index for the Forms"),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-python"},'from synapse.ml.services import *\nfrom pyspark.sql.functions import monotonically_increasing_id, lit\n\n(\n continent_df.withColumn("DocID", monotonically_increasing_id().cast("string"))\n .withColumn("SearchAction", lit("upload"))\n .writeToAzureSearch(\n subscriptionKey=search_key,\n actionCol="SearchAction",\n serviceName=search_service,\n indexName=search_index,\n keyCol="DocID",\n )\n)\n')),(0,a.kt)("h2",{id:"9---try-out-a-search-query"},"9 - Try out a search query"),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-python"},'import requests\n\nsearch_url = "https://{}.search.windows.net/indexes/{}/docs/search?api-version=2019-05-06".format(\n search_service, search_index\n)\nrequests.post(\n search_url, json={"search": "door"}, headers={"api-key": search_key}\n).json()\n')),(0,a.kt)("h2",{id:"10---build-a-chatbot-that-can-use-azure-search-as-a-tool-"},"10 - Build a chatbot that can use Azure Search as a tool \ud83e\udde0\ud83d\udd27"),(0,a.kt)("img",{src:"https://mmlspark.blob.core.windows.net/graphics/notebooks/chatbot_flow_2.svg",width:"40%"}),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-python"},'import json\nimport openai\n\nopenai.api_type = "azure"\nopenai.api_base = openai_url\nopenai.api_key = openai_key\nopenai.api_version = "2023-03-15-preview"\n\nchat_context_prompt = f"""\nYou are a chatbot designed to answer questions with the help of a search engine that has the following information:\n\n{continent_df.columns}\n\nIf you dont know the answer to a question say "I dont know". Do not lie or hallucinate information. Be brief. If you need to use the search engine to solve the please output a json in the form of {{"query": "example_query"}}\n"""\n\n\ndef search_query_prompt(question):\n return f"""\nGiven the search engine above, what would you search for to answer the following question?\n\nQuestion: "{question}"\n\nPlease output a json in the form of {{"query": "example_query"}}\n"""\n\n\ndef search_result_prompt(query):\n search_results = requests.post(\n search_url, json={"search": query}, headers={"api-key": search_key}\n ).json()\n return f"""\n\nYou previously ran a search for "{query}" which returned the following results:\n\n{search_results}\n\nYou should use the results to help you answer questions. If you dont know the answer to a question say "I dont know". Do not lie or hallucinate information. Be Brief and mention which query you used to solve the problem. \n"""\n\n\ndef prompt_gpt(messages):\n response = openai.ChatCompletion.create(\n engine=openai_deployment_name, messages=messages, max_tokens=None, top_p=0.95\n )\n return response["choices"][0]["message"]["content"]\n\n\ndef custom_chatbot(question):\n while True:\n try:\n query = json.loads(\n prompt_gpt(\n [\n {"role": "system", "content": chat_context_prompt},\n {"role": "user", "content": search_query_prompt(question)},\n ]\n )\n )["query"]\n\n return prompt_gpt(\n [\n {"role": "system", "content": chat_context_prompt},\n {"role": "system", "content": search_result_prompt(query)},\n {"role": "user", "content": question},\n ]\n )\n except Exception as e:\n raise e\n')),(0,a.kt)("h2",{id:"11---asking-our-chatbot-a-question"},"11 - Asking our chatbot a question"),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-python"},'custom_chatbot("What did Luke Diaz buy?")\n')),(0,a.kt)("h2",{id:"12---a-quick-double-check"},"12 - A quick double check"),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-python"},'display(\n continent_df.where(col("CustomerName") == "Luke Diaz")\n .select("Description")\n .distinct()\n)\n')))}u.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/1b7af47d.a15ef647.js b/assets/js/1b7af47d.ed1b0a2e.js similarity index 86% rename from assets/js/1b7af47d.a15ef647.js rename to assets/js/1b7af47d.ed1b0a2e.js index 7eced4c36a..2102eb826c 100644 --- a/assets/js/1b7af47d.a15ef647.js +++ b/assets/js/1b7af47d.ed1b0a2e.js @@ -1 +1 @@ -"use strict";(self.webpackChunksynapseml=self.webpackChunksynapseml||[]).push([[14273],{3905:(e,t,n)=>{n.d(t,{Zo:()=>u,kt:()=>m});var r=n(67294);function a(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function i(e){for(var t=1;t=0||(a[n]=e[n]);return a}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(a[n]=e[n])}return a}var l=r.createContext({}),c=function(e){var t=r.useContext(l),n=t;return e&&(n="function"==typeof e?e(t):i(i({},t),e)),n},u=function(e){var t=c(e.components);return r.createElement(l.Provider,{value:t},e.children)},p={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},d=r.forwardRef((function(e,t){var n=e.components,a=e.mdxType,o=e.originalType,l=e.parentName,u=s(e,["components","mdxType","originalType","parentName"]),d=c(n),m=a,f=d["".concat(l,".").concat(m)]||d[m]||p[m]||o;return n?r.createElement(f,i(i({ref:t},u),{},{components:n})):r.createElement(f,i({ref:t},u))}));function m(e,t){var n=arguments,a=t&&t.mdxType;if("string"==typeof e||a){var o=n.length,i=new Array(o);i[0]=d;var s={};for(var l in t)hasOwnProperty.call(t,l)&&(s[l]=t[l]);s.originalType=e,s.mdxType="string"==typeof e?e:a,i[1]=s;for(var c=2;c{n.r(t),n.d(t,{assets:()=>l,contentTitle:()=>i,default:()=>p,frontMatter:()=>o,metadata:()=>s,toc:()=>c});var r=n(83117),a=(n(67294),n(3905));const o={title:"Quickstart - Create Audiobooks",hide_title:!0,status:"stable"},i="Create audiobooks using neural Text to speech",s={unversionedId:"Explore Algorithms/AI Services/Quickstart - Create Audiobooks",id:"Explore Algorithms/AI Services/Quickstart - Create Audiobooks",title:"Quickstart - Create Audiobooks",description:"Step 1: Load libraries and add service information",source:"@site/docs/Explore Algorithms/AI Services/Quickstart - Create Audiobooks.md",sourceDirName:"Explore Algorithms/AI Services",slug:"/Explore Algorithms/AI Services/Quickstart - Create Audiobooks",permalink:"/SynapseML/docs/next/Explore Algorithms/AI Services/Quickstart - Create Audiobooks",draft:!1,tags:[],version:"current",frontMatter:{title:"Quickstart - Create Audiobooks",hide_title:!0,status:"stable"},sidebar:"docs",previous:{title:"Quickstart - Create a Visual Search Engine",permalink:"/SynapseML/docs/next/Explore Algorithms/AI Services/Quickstart - Create a Visual Search Engine"},next:{title:"Quickstart - Document Question and Answering with PDFs",permalink:"/SynapseML/docs/next/Explore Algorithms/AI Services/Quickstart - Document Question and Answering with PDFs"}},l={},c=[{value:"Step 1: Load libraries and add service information",id:"step-1-load-libraries-and-add-service-information",level:2},{value:"Step 2: Attach the storage account to hold the audio files",id:"step-2-attach-the-storage-account-to-hold-the-audio-files",level:2},{value:"Step 3: Read in text data",id:"step-3-read-in-text-data",level:2},{value:"Step 4: Synthesize audio from text",id:"step-4-synthesize-audio-from-text",level:2},{value:"Step 5: Listen to an audio file",id:"step-5-listen-to-an-audio-file",level:2}],u={toc:c};function p(e){let{components:t,...n}=e;return(0,a.kt)("wrapper",(0,r.Z)({},u,n,{components:t,mdxType:"MDXLayout"}),(0,a.kt)("h1",{id:"create-audiobooks-using-neural-text-to-speech"},"Create audiobooks using neural Text to speech"),(0,a.kt)("h2",{id:"step-1-load-libraries-and-add-service-information"},"Step 1: Load libraries and add service information"),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-python"},'from synapse.ml.core.platform import *\n\nif running_on_synapse():\n from notebookutils import mssparkutils\n\n# Fill this in with your Azure AI service information\nservice_key = find_secret(\n secret_name="ai-services-api-key", keyvault="mmlspark-build-keys"\n) # Replace this line with a string like service_key = "dddjnbdkw9329"\nservice_loc = "eastus"\n\nstorage_container = "audiobooks"\nstorage_key = find_secret(\n secret_name="madtest-storage-key", keyvault="mmlspark-build-keys"\n)\nstorage_account = "anomalydetectiontest"\n')),(0,a.kt)("h2",{id:"step-2-attach-the-storage-account-to-hold-the-audio-files"},"Step 2: Attach the storage account to hold the audio files"),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-python"},'spark_key_setting = f"fs.azure.account.key.{storage_account}.blob.core.windows.net"\nspark.sparkContext._jsc.hadoopConfiguration().set(spark_key_setting, storage_key)\n')),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-python"},'import os\nfrom os.path import exists, join\n\nmount_path = f"wasbs://{storage_container}@{storage_account}.blob.core.windows.net/"\nif running_on_synapse():\n mount_dir = join("/synfs", mssparkutils.env.getJobId(), storage_container)\n if not exists(mount_dir):\n mssparkutils.fs.mount(\n mount_path, f"/{storage_container}", {"accountKey": storage_key}\n )\nelif running_on_databricks():\n if not exists(f"/dbfs/mnt/{storage_container}"):\n dbutils.fs.mount(\n source=mount_path,\n mount_point=f"/mnt/{storage_container}",\n extra_configs={spark_key_setting: storage_key},\n )\n')),(0,a.kt)("h2",{id:"step-3-read-in-text-data"},"Step 3: Read in text data"),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-python"},'from pyspark.sql.functions import udf\n\n\n@udf\ndef make_audio_filename(part):\n return f"wasbs://{storage_container}@{storage_account}.blob.core.windows.net/alice_in_wonderland/part_{part}.wav"\n\n\ndf = (\n spark.read.parquet(\n "wasbs://publicwasb@mmlspark.blob.core.windows.net/alice_in_wonderland.parquet"\n )\n .repartition(10)\n .withColumn("filename", make_audio_filename("part"))\n)\n\ndisplay(df)\n')),(0,a.kt)("h2",{id:"step-4-synthesize-audio-from-text"},"Step 4: Synthesize audio from text"),(0,a.kt)("div",null,(0,a.kt)("img",{src:"https://marhamilresearch4.blob.core.windows.net/gutenberg-public/Notebook/NeuralTTS_hero.jpeg",width:"500"})),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-python"},'from synapse.ml.services.speech import TextToSpeech\n\ntts = (\n TextToSpeech()\n .setSubscriptionKey(service_key)\n .setTextCol("text")\n .setLocation(service_loc)\n .setErrorCol("error")\n .setVoiceName("en-US-SteffanNeural")\n .setOutputFileCol("filename")\n)\n\naudio = tts.transform(df).cache()\ndisplay(audio)\n')),(0,a.kt)("h2",{id:"step-5-listen-to-an-audio-file"},"Step 5: Listen to an audio file"),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-python"},'from IPython.display import Audio\n\n\ndef get_audio_file(num):\n if running_on_databricks():\n return f"/dbfs/mnt/{storage_container}/alice_in_wonderland/part_{num}.wav"\n else:\n return join(mount_dir, f"alice_in_wonderland/part_{num}.wav")\n\n\nAudio(filename=get_audio_file(1))\n')))}p.isMDXComponent=!0}}]); \ No newline at end of file +"use strict";(self.webpackChunksynapseml=self.webpackChunksynapseml||[]).push([[14273],{3905:(e,t,n)=>{n.d(t,{Zo:()=>u,kt:()=>m});var r=n(67294);function a(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function i(e){for(var t=1;t=0||(a[n]=e[n]);return a}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(a[n]=e[n])}return a}var l=r.createContext({}),c=function(e){var t=r.useContext(l),n=t;return e&&(n="function"==typeof e?e(t):i(i({},t),e)),n},u=function(e){var t=c(e.components);return r.createElement(l.Provider,{value:t},e.children)},p={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},d=r.forwardRef((function(e,t){var n=e.components,a=e.mdxType,o=e.originalType,l=e.parentName,u=s(e,["components","mdxType","originalType","parentName"]),d=c(n),m=a,f=d["".concat(l,".").concat(m)]||d[m]||p[m]||o;return n?r.createElement(f,i(i({ref:t},u),{},{components:n})):r.createElement(f,i({ref:t},u))}));function m(e,t){var n=arguments,a=t&&t.mdxType;if("string"==typeof e||a){var o=n.length,i=new Array(o);i[0]=d;var s={};for(var l in t)hasOwnProperty.call(t,l)&&(s[l]=t[l]);s.originalType=e,s.mdxType="string"==typeof e?e:a,i[1]=s;for(var c=2;c{n.r(t),n.d(t,{assets:()=>l,contentTitle:()=>i,default:()=>p,frontMatter:()=>o,metadata:()=>s,toc:()=>c});var r=n(83117),a=(n(67294),n(3905));const o={title:"Quickstart - Create Audiobooks",hide_title:!0,status:"stable"},i="Create audiobooks using neural Text to speech",s={unversionedId:"Explore Algorithms/AI Services/Quickstart - Create Audiobooks",id:"Explore Algorithms/AI Services/Quickstart - Create Audiobooks",title:"Quickstart - Create Audiobooks",description:"Step 1: Load libraries and add service information",source:"@site/docs/Explore Algorithms/AI Services/Quickstart - Create Audiobooks.md",sourceDirName:"Explore Algorithms/AI Services",slug:"/Explore Algorithms/AI Services/Quickstart - Create Audiobooks",permalink:"/SynapseML/docs/next/Explore Algorithms/AI Services/Quickstart - Create Audiobooks",draft:!1,tags:[],version:"current",frontMatter:{title:"Quickstart - Create Audiobooks",hide_title:!0,status:"stable"},sidebar:"docs",previous:{title:"Quickstart - Create a Visual Search Engine",permalink:"/SynapseML/docs/next/Explore Algorithms/AI Services/Quickstart - Create a Visual Search Engine"},next:{title:"Quickstart - Document Question and Answering with PDFs",permalink:"/SynapseML/docs/next/Explore Algorithms/AI Services/Quickstart - Document Question and Answering with PDFs"}},l={},c=[{value:"Step 1: Load libraries and add service information",id:"step-1-load-libraries-and-add-service-information",level:2},{value:"Step 2: Attach the storage account to hold the audio files",id:"step-2-attach-the-storage-account-to-hold-the-audio-files",level:2},{value:"Step 3: Read in text data",id:"step-3-read-in-text-data",level:2},{value:"Step 4: Synthesize audio from text",id:"step-4-synthesize-audio-from-text",level:2},{value:"Step 5: Listen to an audio file",id:"step-5-listen-to-an-audio-file",level:2}],u={toc:c};function p(e){let{components:t,...n}=e;return(0,a.kt)("wrapper",(0,r.Z)({},u,n,{components:t,mdxType:"MDXLayout"}),(0,a.kt)("h1",{id:"create-audiobooks-using-neural-text-to-speech"},"Create audiobooks using neural Text to speech"),(0,a.kt)("h2",{id:"step-1-load-libraries-and-add-service-information"},"Step 1: Load libraries and add service information"),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-python"},'from synapse.ml.core.platform import *\n\nif running_on_synapse():\n from notebookutils import mssparkutils\n\n# Fill this in with your Azure AI service information\nservice_key = find_secret(\n secret_name="ai-services-api-key", keyvault="mmlspark-build-keys"\n) # Replace this line with a string like service_key = "dddjnbdkw9329"\nservice_loc = "eastus"\n\nstorage_container = "audiobooks"\nstorage_key = find_secret(\n secret_name="madtest-storage-key", keyvault="mmlspark-build-keys"\n)\nstorage_account = "anomalydetectiontest"\n')),(0,a.kt)("h2",{id:"step-2-attach-the-storage-account-to-hold-the-audio-files"},"Step 2: Attach the storage account to hold the audio files"),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-python"},'spark_key_setting = f"fs.azure.account.key.{storage_account}.blob.core.windows.net"\nspark.sparkContext._jsc.hadoopConfiguration().set(spark_key_setting, storage_key)\n')),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-python"},'import os\nfrom os.path import exists, join\n\nmount_path = f"wasbs://{storage_container}@{storage_account}.blob.core.windows.net/"\nif running_on_synapse():\n mount_dir = join("/synfs", mssparkutils.env.getJobId(), storage_container)\n if not exists(mount_dir):\n mssparkutils.fs.mount(\n mount_path, f"/{storage_container}", {"accountKey": storage_key}\n )\nelif running_on_databricks():\n if not exists(f"/dbfs/mnt/{storage_container}"):\n dbutils.fs.mount(\n source=mount_path,\n mount_point=f"/mnt/{storage_container}",\n extra_configs={spark_key_setting: storage_key},\n )\n')),(0,a.kt)("h2",{id:"step-3-read-in-text-data"},"Step 3: Read in text data"),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-python"},'from pyspark.sql.functions import udf\n\n\n@udf\ndef make_audio_filename(part):\n return f"wasbs://{storage_container}@{storage_account}.blob.core.windows.net/alice_in_wonderland/part_{part}.wav"\n\n\ndf = (\n spark.read.parquet(\n "wasbs://publicwasb@mmlspark.blob.core.windows.net/alice_in_wonderland.parquet"\n )\n .repartition(10)\n .withColumn("filename", make_audio_filename("part"))\n)\n\ndisplay(df)\n')),(0,a.kt)("h2",{id:"step-4-synthesize-audio-from-text"},"Step 4: Synthesize audio from text"),(0,a.kt)("div",null,(0,a.kt)("img",{src:"https://mmlspark.blob.core.windows.net/graphics/NeuralTTS_hero.jpeg",width:"500"})),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-python"},'from synapse.ml.services.speech import TextToSpeech\n\ntts = (\n TextToSpeech()\n .setSubscriptionKey(service_key)\n .setTextCol("text")\n .setLocation(service_loc)\n .setErrorCol("error")\n .setVoiceName("en-US-SteffanNeural")\n .setOutputFileCol("filename")\n)\n\naudio = tts.transform(df).cache()\ndisplay(audio)\n')),(0,a.kt)("h2",{id:"step-5-listen-to-an-audio-file"},"Step 5: Listen to an audio file"),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-python"},'from IPython.display import Audio\n\n\ndef get_audio_file(num):\n if running_on_databricks():\n return f"/dbfs/mnt/{storage_container}/alice_in_wonderland/part_{num}.wav"\n else:\n return join(mount_dir, f"alice_in_wonderland/part_{num}.wav")\n\n\nAudio(filename=get_audio_file(1))\n')))}p.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/52880d18.33813778.js b/assets/js/52880d18.e185638d.js similarity index 63% rename from assets/js/52880d18.33813778.js rename to assets/js/52880d18.e185638d.js index 15b97d3c25..d428bbcd9f 100644 --- a/assets/js/52880d18.33813778.js +++ b/assets/js/52880d18.e185638d.js @@ -1 +1 @@ -"use strict";(self.webpackChunksynapseml=self.webpackChunksynapseml||[]).push([[62750],{3905:(e,t,n)=>{n.d(t,{Zo:()=>d,kt:()=>u});var r=n(67294);function a(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function i(e){for(var t=1;t=0||(a[n]=e[n]);return a}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(a[n]=e[n])}return a}var l=r.createContext({}),p=function(e){var t=r.useContext(l),n=t;return e&&(n="function"==typeof e?e(t):i(i({},t),e)),n},d=function(e){var t=p(e.components);return r.createElement(l.Provider,{value:t},e.children)},m={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},c=r.forwardRef((function(e,t){var n=e.components,a=e.mdxType,o=e.originalType,l=e.parentName,d=s(e,["components","mdxType","originalType","parentName"]),c=p(n),u=a,h=c["".concat(l,".").concat(u)]||c[u]||m[u]||o;return n?r.createElement(h,i(i({ref:t},d),{},{components:n})):r.createElement(h,i({ref:t},d))}));function u(e,t){var n=arguments,a=t&&t.mdxType;if("string"==typeof e||a){var o=n.length,i=new Array(o);i[0]=c;var s={};for(var l in t)hasOwnProperty.call(t,l)&&(s[l]=t[l]);s.originalType=e,s.mdxType="string"==typeof e?e:a,i[1]=s;for(var p=2;p{n.r(t),n.d(t,{assets:()=>l,contentTitle:()=>i,default:()=>m,frontMatter:()=>o,metadata:()=>s,toc:()=>p});var r=n(83117),a=(n(67294),n(3905));const o={title:"Quickstart - OpenAI Embedding",hide_title:!0,status:"stable"},i="Embedding Text with Azure OpenAI",s={unversionedId:"Explore Algorithms/OpenAI/Quickstart - OpenAI Embedding",id:"Explore Algorithms/OpenAI/Quickstart - OpenAI Embedding",title:"Quickstart - OpenAI Embedding",description:"The Azure OpenAI service can be used to solve a large number of natural language tasks through prompting the completion API. To make it easier to scale your prompting workflows from a few examples to large datasets of examples we have integrated the Azure OpenAI service with the distributed machine learning library SynapseML. This integration makes it easy to use the Apache Spark distributed computing framework to process millions of prompts with the OpenAI service. This tutorial shows how to apply large language models to generate embeddings for large datasets of text.",source:"@site/docs/Explore Algorithms/OpenAI/Quickstart - OpenAI Embedding.md",sourceDirName:"Explore Algorithms/OpenAI",slug:"/Explore Algorithms/OpenAI/Quickstart - OpenAI Embedding",permalink:"/SynapseML/docs/next/Explore Algorithms/OpenAI/Quickstart - OpenAI Embedding",draft:!1,tags:[],version:"current",frontMatter:{title:"Quickstart - OpenAI Embedding",hide_title:!0,status:"stable"},sidebar:"docs",previous:{title:"OpenAI",permalink:"/SynapseML/docs/next/Explore Algorithms/OpenAI/"},next:{title:"Quickstart - OpenAI Embedding and GPU based KNN",permalink:"/SynapseML/docs/next/Explore Algorithms/OpenAI/Quickstart - OpenAI Embedding and GPU based KNN"}},l={},p=[{value:"Step 1: Prerequisites",id:"step-1-prerequisites",level:2},{value:"Step 2: Import this guide as a notebook",id:"step-2-import-this-guide-as-a-notebook",level:2},{value:"Step 3: Fill in your service information",id:"step-3-fill-in-your-service-information",level:2},{value:"Step 4: Load Data",id:"step-4-load-data",level:2},{value:"Step 5: Generate Embeddings",id:"step-5-generate-embeddings",level:2},{value:"Step 6: Reduce Embedding dimensionality for Visualization",id:"step-6-reduce-embedding-dimensionality-for-visualization",level:2},{value:"Step 7: Plot the embeddings",id:"step-7-plot-the-embeddings",level:2},{value:"Step 8: Build a fast vector index to over review embeddings",id:"step-8-build-a-fast-vector-index-to-over-review-embeddings",level:2},{value:"Step 8: Build the retrieval model pipeline",id:"step-8-build-the-retrieval-model-pipeline",level:2},{value:"Step 9: Retrieve results",id:"step-9-retrieve-results",level:2}],d={toc:p};function m(e){let{components:t,...n}=e;return(0,a.kt)("wrapper",(0,r.Z)({},d,n,{components:t,mdxType:"MDXLayout"}),(0,a.kt)("h1",{id:"embedding-text-with-azure-openai"},"Embedding Text with Azure OpenAI"),(0,a.kt)("p",null,"The Azure OpenAI service can be used to solve a large number of natural language tasks through prompting the completion API. To make it easier to scale your prompting workflows from a few examples to large datasets of examples we have integrated the Azure OpenAI service with the distributed machine learning library ",(0,a.kt)("a",{parentName:"p",href:"https://www.microsoft.com/en-us/research/blog/synapseml-a-simple-multilingual-and-massively-parallel-machine-learning-library/"},"SynapseML"),". This integration makes it easy to use the ",(0,a.kt)("a",{parentName:"p",href:"https://spark.apache.org/"},"Apache Spark")," distributed computing framework to process millions of prompts with the OpenAI service. This tutorial shows how to apply large language models to generate embeddings for large datasets of text. "),(0,a.kt)("h2",{id:"step-1-prerequisites"},"Step 1: Prerequisites"),(0,a.kt)("p",null,"The key prerequisites for this quickstart include a working Azure OpenAI resource, and an Apache Spark cluster with SynapseML installed. We suggest creating a Synapse workspace, but an Azure Databricks, HDInsight, or Spark on Kubernetes, or even a python environment with the ",(0,a.kt)("inlineCode",{parentName:"p"},"pyspark")," package will work. "),(0,a.kt)("ol",null,(0,a.kt)("li",{parentName:"ol"},"An Azure OpenAI resource \u2013 request access ",(0,a.kt)("a",{parentName:"li",href:"https://customervoice.microsoft.com/Pages/ResponsePage.aspx?id=v4j5cvGGr0GRqy180BHbR7en2Ais5pxKtso_Pz4b1_xUOFA5Qk1UWDRBMjg0WFhPMkIzTzhKQ1dWNyQlQCN0PWcu"},"here")," before ",(0,a.kt)("a",{parentName:"li",href:"https://docs.microsoft.com/en-us/azure/cognitive-services/openai/how-to/create-resource?pivots=web-portal#create-a-resource"},"creating a resource")),(0,a.kt)("li",{parentName:"ol"},(0,a.kt)("a",{parentName:"li",href:"https://docs.microsoft.com/en-us/azure/synapse-analytics/get-started-create-workspace"},"Create a Synapse workspace")),(0,a.kt)("li",{parentName:"ol"},(0,a.kt)("a",{parentName:"li",href:"https://docs.microsoft.com/en-us/azure/synapse-analytics/get-started-analyze-spark#create-a-serverless-apache-spark-pool"},"Create a serverless Apache Spark pool"))),(0,a.kt)("h2",{id:"step-2-import-this-guide-as-a-notebook"},"Step 2: Import this guide as a notebook"),(0,a.kt)("p",null,"The next step is to add this code into your Spark cluster. You can either create a notebook in your Spark platform and copy the code into this notebook to run the demo. Or download the notebook and import it into Synapse Analytics"),(0,a.kt)("ol",null,(0,a.kt)("li",{parentName:"ol"},(0,a.kt)("a",{parentName:"li",href:"https://github.com/microsoft/SynapseML/blob/master/notebooks/features/cognitive_services/CognitiveServices%20-%20OpenAI%20Embedding.ipynb"},"Download this demo as a notebook")," (click Raw, then save the file)"),(0,a.kt)("li",{parentName:"ol"},"Import the notebook ",(0,a.kt)("a",{parentName:"li",href:"https://docs.microsoft.com/en-us/azure/synapse-analytics/spark/apache-spark-development-using-notebooks#create-a-notebook"},"into the Synapse Workspace")," or if using Databricks ",(0,a.kt)("a",{parentName:"li",href:"https://docs.microsoft.com/en-us/azure/databricks/notebooks/notebooks-manage#create-a-notebook"},"into the Databricks Workspace")),(0,a.kt)("li",{parentName:"ol"},"Install SynapseML on your cluster. Please see the installation instructions for Synapse at the bottom of ",(0,a.kt)("a",{parentName:"li",href:"https://microsoft.github.io/SynapseML/"},"the SynapseML website"),". Note that this requires pasting an additional cell at the top of the notebook you just imported"),(0,a.kt)("li",{parentName:"ol"},"Connect your notebook to a cluster and follow along, editing and rnnung the cells below.")),(0,a.kt)("h2",{id:"step-3-fill-in-your-service-information"},"Step 3: Fill in your service information"),(0,a.kt)("p",null,"Next, please edit the cell in the notebook to point to your service. In particular set the ",(0,a.kt)("inlineCode",{parentName:"p"},"service_name"),", ",(0,a.kt)("inlineCode",{parentName:"p"},"deployment_name"),", ",(0,a.kt)("inlineCode",{parentName:"p"},"location"),", and ",(0,a.kt)("inlineCode",{parentName:"p"},"key")," variables to match those for your OpenAI service:"),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-python"},'from synapse.ml.core.platform import find_secret\n\n# Fill in the following lines with your service information\n# Learn more about selecting which embedding model to choose: https://openai.com/blog/new-and-improved-embedding-model\nservice_name = "synapseml-openai"\ndeployment_name_embeddings = "text-embedding-ada-002"\n\nkey = find_secret(\n secret_name="openai-api-key", keyvault="mmlspark-build-keys"\n) # please replace this with your key as a string\n\nassert key is not None and service_name is not None\n')),(0,a.kt)("h2",{id:"step-4-load-data"},"Step 4: Load Data"),(0,a.kt)("p",null,"In this demo we will explore a dataset of fine food reviews"),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-python"},'import pyspark.sql.functions as F\n\ndf = (\n spark.read.options(inferSchema="True", delimiter=",", header=True)\n .csv("wasbs://publicwasb@mmlspark.blob.core.windows.net/fine_food_reviews_1k.csv")\n .repartition(5)\n)\n\ndf = df.withColumn(\n "combined",\n F.format_string("Title: %s; Content: %s", F.trim(df.Summary), F.trim(df.Text)),\n)\n\ndisplay(df)\n')),(0,a.kt)("h2",{id:"step-5-generate-embeddings"},"Step 5: Generate Embeddings"),(0,a.kt)("p",null,"We will first generate embeddings for the reviews using the SynapseML OpenAIEmbedding client."),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-python"},'from synapse.ml.services.openai import OpenAIEmbedding\n\nembedding = (\n OpenAIEmbedding()\n .setSubscriptionKey(key)\n .setDeploymentName(deployment_name_embeddings)\n .setCustomServiceName(service_name)\n .setTextCol("combined")\n .setErrorCol("error")\n .setOutputCol("embeddings")\n)\n\ncompleted_df = embedding.transform(df).cache()\ndisplay(completed_df)\n')),(0,a.kt)("h2",{id:"step-6-reduce-embedding-dimensionality-for-visualization"},"Step 6: Reduce Embedding dimensionality for Visualization"),(0,a.kt)("p",null,"We reduce the dimensionality to 2 dimensions using t-SNE decomposition."),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-python"},'import pandas as pd\nfrom sklearn.manifold import TSNE\nimport numpy as np\n\ncollected = list(completed_df.collect())\nmatrix = np.array([[r["embeddings"]] for r in collected])[:, 0, :].astype(np.float64)\nscores = np.array([[r["Score"]] for r in collected]).reshape(-1)\n\ntsne = TSNE(n_components=2, perplexity=15, random_state=42, init="pca")\nvis_dims = tsne.fit_transform(matrix)\nvis_dims.shape\n')),(0,a.kt)("h2",{id:"step-7-plot-the-embeddings"},"Step 7: Plot the embeddings"),(0,a.kt)("p",null,"We now use t-SNE to reduce the dimensionality of the embeddings from 1536 to 2. Once the embeddings are reduced to two dimensions, we can plot them in a 2D scatter plot. We colour each review by its star rating, ranging from red for negative reviews, to green for positive reviews. We can observe a decent data separation even in the reduced 2 dimensions."),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-python"},'import matplotlib.pyplot as plt\nimport matplotlib\nimport numpy as np\n\ncolors = ["red", "darkorange", "gold", "turquoise", "darkgreen"]\nx = [x for x, y in vis_dims]\ny = [y for x, y in vis_dims]\ncolor_indices = scores - 1\n\ncolormap = matplotlib.colors.ListedColormap(colors)\nplt.scatter(x, y, c=color_indices, cmap=colormap, alpha=0.3)\nfor score in [0, 1, 2, 3, 4]:\n avg_x = np.array(x)[scores - 1 == score].mean()\n avg_y = np.array(y)[scores - 1 == score].mean()\n color = colors[score]\n plt.scatter(avg_x, avg_y, marker="x", color=color, s=100)\n\nplt.title("Amazon ratings visualized in language using t-SNE")\n')),(0,a.kt)("h2",{id:"step-8-build-a-fast-vector-index-to-over-review-embeddings"},"Step 8: Build a fast vector index to over review embeddings"),(0,a.kt)("p",null,"We will use SynapseML's KNN estimator to build a fast cosine-similarity retrieval engine."),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-python"},'from synapse.ml.nn import *\n\nknn = (\n KNN()\n .setFeaturesCol("embeddings")\n .setValuesCol("id")\n .setOutputCol("output")\n .setK(10)\n)\n\nknn_index = knn.fit(completed_df)\n')),(0,a.kt)("h2",{id:"step-8-build-the-retrieval-model-pipeline"},"Step 8: Build the retrieval model pipeline"),(0,a.kt)("p",null,'Note: The data types of the ID columns in the document and query dataframes should be the same. For some OpenAI models, users should use separate models for embedding documents and queries. These models are denoted by the "-doc" and "-query" suffixes respectively.'),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-python"},'from pyspark.ml import PipelineModel\n\nembedding_query = (\n OpenAIEmbedding()\n .setSubscriptionKey(key)\n .setDeploymentName(deployment_name_embeddings)\n .setCustomServiceName(service_name)\n .setTextCol("query")\n .setErrorCol("error")\n .setOutputCol("embeddings")\n)\n\nretrieval_model = PipelineModel(stages=[embedding_query, knn_index])\n')),(0,a.kt)("h2",{id:"step-9-retrieve-results"},"Step 9: Retrieve results"),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-python"},'query_df = (\n spark.createDataFrame(\n [\n (\n 0,\n "desserts",\n ),\n (\n 1,\n "disgusting",\n ),\n ]\n )\n .toDF("id", "query")\n .withColumn("id", F.col("id").cast("int"))\n)\n\n\ndf_matches = retrieval_model.transform(query_df).cache()\n\ndf_result = (\n df_matches.withColumn("match", F.explode("output"))\n .join(df, df["id"] == F.col("match.value"))\n .select("query", F.col("combined"), "match.distance")\n)\n\ndisplay(df_result)\n')))}m.isMDXComponent=!0}}]); \ No newline at end of file +"use strict";(self.webpackChunksynapseml=self.webpackChunksynapseml||[]).push([[62750],{3905:(e,t,n)=>{n.d(t,{Zo:()=>d,kt:()=>u});var r=n(67294);function a(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function i(e){for(var t=1;t=0||(a[n]=e[n]);return a}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(a[n]=e[n])}return a}var l=r.createContext({}),p=function(e){var t=r.useContext(l),n=t;return e&&(n="function"==typeof e?e(t):i(i({},t),e)),n},d=function(e){var t=p(e.components);return r.createElement(l.Provider,{value:t},e.children)},m={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},c=r.forwardRef((function(e,t){var n=e.components,a=e.mdxType,o=e.originalType,l=e.parentName,d=s(e,["components","mdxType","originalType","parentName"]),c=p(n),u=a,h=c["".concat(l,".").concat(u)]||c[u]||m[u]||o;return n?r.createElement(h,i(i({ref:t},d),{},{components:n})):r.createElement(h,i({ref:t},d))}));function u(e,t){var n=arguments,a=t&&t.mdxType;if("string"==typeof e||a){var o=n.length,i=new Array(o);i[0]=c;var s={};for(var l in t)hasOwnProperty.call(t,l)&&(s[l]=t[l]);s.originalType=e,s.mdxType="string"==typeof e?e:a,i[1]=s;for(var p=2;p{n.r(t),n.d(t,{assets:()=>l,contentTitle:()=>i,default:()=>m,frontMatter:()=>o,metadata:()=>s,toc:()=>p});var r=n(83117),a=(n(67294),n(3905));const o={title:"Quickstart - OpenAI Embedding",hide_title:!0,status:"stable"},i="Embedding Text with Azure OpenAI",s={unversionedId:"Explore Algorithms/OpenAI/Quickstart - OpenAI Embedding",id:"Explore Algorithms/OpenAI/Quickstart - OpenAI Embedding",title:"Quickstart - OpenAI Embedding",description:"The Azure OpenAI service can be used to solve a large number of natural language tasks through prompting the completion API. To make it easier to scale your prompting workflows from a few examples to large datasets of examples we have integrated the Azure OpenAI service with the distributed machine learning library SynapseML. This integration makes it easy to use the Apache Spark distributed computing framework to process millions of prompts with the OpenAI service. This tutorial shows how to apply large language models to generate embeddings for large datasets of text.",source:"@site/docs/Explore Algorithms/OpenAI/Quickstart - OpenAI Embedding.md",sourceDirName:"Explore Algorithms/OpenAI",slug:"/Explore Algorithms/OpenAI/Quickstart - OpenAI Embedding",permalink:"/SynapseML/docs/next/Explore Algorithms/OpenAI/Quickstart - OpenAI Embedding",draft:!1,tags:[],version:"current",frontMatter:{title:"Quickstart - OpenAI Embedding",hide_title:!0,status:"stable"},sidebar:"docs",previous:{title:"OpenAI",permalink:"/SynapseML/docs/next/Explore Algorithms/OpenAI/"},next:{title:"Quickstart - OpenAI Embedding and GPU based KNN",permalink:"/SynapseML/docs/next/Explore Algorithms/OpenAI/Quickstart - OpenAI Embedding and GPU based KNN"}},l={},p=[{value:"Step 1: Prerequisites",id:"step-1-prerequisites",level:2},{value:"Step 2: Import this guide as a notebook",id:"step-2-import-this-guide-as-a-notebook",level:2},{value:"Step 3: Fill in your service information",id:"step-3-fill-in-your-service-information",level:2},{value:"Step 4: Load Data",id:"step-4-load-data",level:2},{value:"Step 5: Generate Embeddings",id:"step-5-generate-embeddings",level:2},{value:"Step 6: Reduce Embedding dimensionality for Visualization",id:"step-6-reduce-embedding-dimensionality-for-visualization",level:2},{value:"Step 7: Plot the embeddings",id:"step-7-plot-the-embeddings",level:2},{value:"Step 8: Build a fast vector index to over review embeddings",id:"step-8-build-a-fast-vector-index-to-over-review-embeddings",level:2},{value:"Step 8: Build the retrieval model pipeline",id:"step-8-build-the-retrieval-model-pipeline",level:2},{value:"Step 9: Retrieve results",id:"step-9-retrieve-results",level:2}],d={toc:p};function m(e){let{components:t,...n}=e;return(0,a.kt)("wrapper",(0,r.Z)({},d,n,{components:t,mdxType:"MDXLayout"}),(0,a.kt)("h1",{id:"embedding-text-with-azure-openai"},"Embedding Text with Azure OpenAI"),(0,a.kt)("p",null,"The Azure OpenAI service can be used to solve a large number of natural language tasks through prompting the completion API. To make it easier to scale your prompting workflows from a few examples to large datasets of examples we have integrated the Azure OpenAI service with the distributed machine learning library ",(0,a.kt)("a",{parentName:"p",href:"https://www.microsoft.com/en-us/research/blog/synapseml-a-simple-multilingual-and-massively-parallel-machine-learning-library/"},"SynapseML"),". This integration makes it easy to use the ",(0,a.kt)("a",{parentName:"p",href:"https://spark.apache.org/"},"Apache Spark")," distributed computing framework to process millions of prompts with the OpenAI service. This tutorial shows how to apply large language models to generate embeddings for large datasets of text. "),(0,a.kt)("h2",{id:"step-1-prerequisites"},"Step 1: Prerequisites"),(0,a.kt)("p",null,"The key prerequisites for this quickstart include a working Azure OpenAI resource, and an Apache Spark cluster with SynapseML installed. We suggest creating a Synapse workspace, but an Azure Databricks, HDInsight, or Spark on Kubernetes, or even a python environment with the ",(0,a.kt)("inlineCode",{parentName:"p"},"pyspark")," package will work. "),(0,a.kt)("ol",null,(0,a.kt)("li",{parentName:"ol"},"An Azure OpenAI resource \u2013 request access ",(0,a.kt)("a",{parentName:"li",href:"https://customervoice.microsoft.com/Pages/ResponsePage.aspx?id=v4j5cvGGr0GRqy180BHbR7en2Ais5pxKtso_Pz4b1_xUOFA5Qk1UWDRBMjg0WFhPMkIzTzhKQ1dWNyQlQCN0PWcu"},"here")," before ",(0,a.kt)("a",{parentName:"li",href:"https://docs.microsoft.com/en-us/azure/cognitive-services/openai/how-to/create-resource?pivots=web-portal#create-a-resource"},"creating a resource")),(0,a.kt)("li",{parentName:"ol"},(0,a.kt)("a",{parentName:"li",href:"https://docs.microsoft.com/en-us/azure/synapse-analytics/get-started-create-workspace"},"Create a Synapse workspace")),(0,a.kt)("li",{parentName:"ol"},(0,a.kt)("a",{parentName:"li",href:"https://docs.microsoft.com/en-us/azure/synapse-analytics/get-started-analyze-spark#create-a-serverless-apache-spark-pool"},"Create a serverless Apache Spark pool"))),(0,a.kt)("h2",{id:"step-2-import-this-guide-as-a-notebook"},"Step 2: Import this guide as a notebook"),(0,a.kt)("p",null,"The next step is to add this code into your Spark cluster. You can either create a notebook in your Spark platform and copy the code into this notebook to run the demo. Or download the notebook and import it into Synapse Analytics"),(0,a.kt)("ol",null,(0,a.kt)("li",{parentName:"ol"},(0,a.kt)("a",{parentName:"li",href:"https://github.com/microsoft/SynapseML/blob/master/notebooks/features/cognitive_services/CognitiveServices%20-%20OpenAI%20Embedding.ipynb"},"Download this demo as a notebook")," (click Raw, then save the file)"),(0,a.kt)("li",{parentName:"ol"},"Import the notebook ",(0,a.kt)("a",{parentName:"li",href:"https://docs.microsoft.com/en-us/azure/synapse-analytics/spark/apache-spark-development-using-notebooks#create-a-notebook"},"into the Synapse Workspace")," or if using Databricks ",(0,a.kt)("a",{parentName:"li",href:"https://docs.microsoft.com/en-us/azure/databricks/notebooks/notebooks-manage#create-a-notebook"},"into the Databricks Workspace")),(0,a.kt)("li",{parentName:"ol"},"Install SynapseML on your cluster. Please see the installation instructions for Synapse at the bottom of ",(0,a.kt)("a",{parentName:"li",href:"https://microsoft.github.io/SynapseML/"},"the SynapseML website"),". Note that this requires pasting an additional cell at the top of the notebook you just imported"),(0,a.kt)("li",{parentName:"ol"},"Connect your notebook to a cluster and follow along, editing and rnnung the cells below.")),(0,a.kt)("h2",{id:"step-3-fill-in-your-service-information"},"Step 3: Fill in your service information"),(0,a.kt)("p",null,"Next, please edit the cell in the notebook to point to your service. In particular set the ",(0,a.kt)("inlineCode",{parentName:"p"},"service_name"),", ",(0,a.kt)("inlineCode",{parentName:"p"},"deployment_name"),", ",(0,a.kt)("inlineCode",{parentName:"p"},"location"),", and ",(0,a.kt)("inlineCode",{parentName:"p"},"key")," variables to match those for your OpenAI service:"),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-python"},'from synapse.ml.core.platform import find_secret\n\n# Fill in the following lines with your service information\n# Learn more about selecting which embedding model to choose: https://openai.com/blog/new-and-improved-embedding-model\nservice_name = "synapseml-openai-2"\ndeployment_name_embeddings = "text-embedding-ada-002"\n\nkey = find_secret(\n secret_name="openai-api-key-2", keyvault="mmlspark-build-keys"\n) # please replace this with your key as a string\n\nassert key is not None and service_name is not None\n')),(0,a.kt)("h2",{id:"step-4-load-data"},"Step 4: Load Data"),(0,a.kt)("p",null,"In this demo we will explore a dataset of fine food reviews"),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-python"},'import pyspark.sql.functions as F\n\ndf = (\n spark.read.options(inferSchema="True", delimiter=",", header=True)\n .csv("wasbs://publicwasb@mmlspark.blob.core.windows.net/fine_food_reviews_1k.csv")\n .repartition(5)\n)\n\ndf = df.withColumn(\n "combined",\n F.format_string("Title: %s; Content: %s", F.trim(df.Summary), F.trim(df.Text)),\n)\n\ndisplay(df)\n')),(0,a.kt)("h2",{id:"step-5-generate-embeddings"},"Step 5: Generate Embeddings"),(0,a.kt)("p",null,"We will first generate embeddings for the reviews using the SynapseML OpenAIEmbedding client."),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-python"},'from synapse.ml.services.openai import OpenAIEmbedding\n\nembedding = (\n OpenAIEmbedding()\n .setSubscriptionKey(key)\n .setDeploymentName(deployment_name_embeddings)\n .setCustomServiceName(service_name)\n .setTextCol("combined")\n .setErrorCol("error")\n .setOutputCol("embeddings")\n)\n\ncompleted_df = embedding.transform(df).cache()\ndisplay(completed_df)\n')),(0,a.kt)("h2",{id:"step-6-reduce-embedding-dimensionality-for-visualization"},"Step 6: Reduce Embedding dimensionality for Visualization"),(0,a.kt)("p",null,"We reduce the dimensionality to 2 dimensions using t-SNE decomposition."),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-python"},'import pandas as pd\nfrom sklearn.manifold import TSNE\nimport numpy as np\n\ncollected = list(completed_df.collect())\nmatrix = np.array([[r["embeddings"]] for r in collected])[:, 0, :].astype(np.float64)\nscores = np.array([[r["Score"]] for r in collected]).reshape(-1)\n\ntsne = TSNE(n_components=2, perplexity=15, random_state=42, init="pca")\nvis_dims = tsne.fit_transform(matrix)\nvis_dims.shape\n')),(0,a.kt)("h2",{id:"step-7-plot-the-embeddings"},"Step 7: Plot the embeddings"),(0,a.kt)("p",null,"We now use t-SNE to reduce the dimensionality of the embeddings from 1536 to 2. Once the embeddings are reduced to two dimensions, we can plot them in a 2D scatter plot. We colour each review by its star rating, ranging from red for negative reviews, to green for positive reviews. We can observe a decent data separation even in the reduced 2 dimensions."),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-python"},'import matplotlib.pyplot as plt\nimport matplotlib\nimport numpy as np\n\ncolors = ["red", "darkorange", "gold", "turquoise", "darkgreen"]\nx = [x for x, y in vis_dims]\ny = [y for x, y in vis_dims]\ncolor_indices = scores - 1\n\ncolormap = matplotlib.colors.ListedColormap(colors)\nplt.scatter(x, y, c=color_indices, cmap=colormap, alpha=0.3)\nfor score in [0, 1, 2, 3, 4]:\n avg_x = np.array(x)[scores - 1 == score].mean()\n avg_y = np.array(y)[scores - 1 == score].mean()\n color = colors[score]\n plt.scatter(avg_x, avg_y, marker="x", color=color, s=100)\n\nplt.title("Amazon ratings visualized in language using t-SNE")\n')),(0,a.kt)("h2",{id:"step-8-build-a-fast-vector-index-to-over-review-embeddings"},"Step 8: Build a fast vector index to over review embeddings"),(0,a.kt)("p",null,"We will use SynapseML's KNN estimator to build a fast cosine-similarity retrieval engine."),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-python"},'from synapse.ml.nn import *\n\nknn = (\n KNN()\n .setFeaturesCol("embeddings")\n .setValuesCol("id")\n .setOutputCol("output")\n .setK(10)\n)\n\nknn_index = knn.fit(completed_df)\n')),(0,a.kt)("h2",{id:"step-8-build-the-retrieval-model-pipeline"},"Step 8: Build the retrieval model pipeline"),(0,a.kt)("p",null,'Note: The data types of the ID columns in the document and query dataframes should be the same. For some OpenAI models, users should use separate models for embedding documents and queries. These models are denoted by the "-doc" and "-query" suffixes respectively.'),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-python"},'from pyspark.ml import PipelineModel\n\nembedding_query = (\n OpenAIEmbedding()\n .setSubscriptionKey(key)\n .setDeploymentName(deployment_name_embeddings)\n .setCustomServiceName(service_name)\n .setTextCol("query")\n .setErrorCol("error")\n .setOutputCol("embeddings")\n)\n\nretrieval_model = PipelineModel(stages=[embedding_query, knn_index])\n')),(0,a.kt)("h2",{id:"step-9-retrieve-results"},"Step 9: Retrieve results"),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-python"},'query_df = (\n spark.createDataFrame(\n [\n (\n 0,\n "desserts",\n ),\n (\n 1,\n "disgusting",\n ),\n ]\n )\n .toDF("id", "query")\n .withColumn("id", F.col("id").cast("int"))\n)\n\n\ndf_matches = retrieval_model.transform(query_df).cache()\n\ndf_result = (\n df_matches.withColumn("match", F.explode("output"))\n .join(df, df["id"] == F.col("match.value"))\n .select("query", F.col("combined"), "match.distance")\n)\n\ndisplay(df_result)\n')))}m.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/96cf5ff0.546126d5.js b/assets/js/96cf5ff0.546126d5.js new file mode 100644 index 0000000000..38e77b514f --- /dev/null +++ b/assets/js/96cf5ff0.546126d5.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunksynapseml=self.webpackChunksynapseml||[]).push([[46135],{3905:(e,t,n)=>{n.d(t,{Zo:()=>m,kt:()=>u});var a=n(67294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function i(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var p=a.createContext({}),l=function(e){var t=a.useContext(p),n=t;return e&&(n="function"==typeof e?e(t):i(i({},t),e)),n},m=function(e){var t=l(e.components);return a.createElement(p.Provider,{value:t},e.children)},c={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},h=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,o=e.originalType,p=e.parentName,m=s(e,["components","mdxType","originalType","parentName"]),h=l(n),u=r,d=h["".concat(p,".").concat(u)]||h[u]||c[u]||o;return n?a.createElement(d,i(i({ref:t},m),{},{components:n})):a.createElement(d,i({ref:t},m))}));function u(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var o=n.length,i=new Array(o);i[0]=h;var s={};for(var p in t)hasOwnProperty.call(t,p)&&(s[p]=t[p]);s.originalType=e,s.mdxType="string"==typeof e?e:r,i[1]=s;for(var l=2;l{n.r(t),n.d(t,{assets:()=>p,contentTitle:()=>i,default:()=>c,frontMatter:()=>o,metadata:()=>s,toc:()=>l});var a=n(83117),r=(n(67294),n(3905));const o={title:"OpenAI",hide_title:!0,status:"stable"},i="Azure OpenAI for big data",s={unversionedId:"Explore Algorithms/OpenAI/OpenAI",id:"Explore Algorithms/OpenAI/OpenAI",title:"OpenAI",description:"The Azure OpenAI service can be used to solve a large number of natural language tasks through prompting the completion API. To make it easier to scale your prompting workflows from a few examples to large datasets of examples, we have integrated the Azure OpenAI service with the distributed machine learning library SynapseML. This integration makes it easy to use the Apache Spark distributed computing framework to process millions of prompts with the OpenAI service. This tutorial shows how to apply large language models at a distributed scale using Azure OpenAI.",source:"@site/docs/Explore Algorithms/OpenAI/OpenAI.md",sourceDirName:"Explore Algorithms/OpenAI",slug:"/Explore Algorithms/OpenAI/",permalink:"/SynapseML/docs/next/Explore Algorithms/OpenAI/",draft:!1,tags:[],version:"current",frontMatter:{title:"OpenAI",hide_title:!0,status:"stable"},sidebar:"docs",previous:{title:"Langchain",permalink:"/SynapseML/docs/next/Explore Algorithms/OpenAI/Langchain"},next:{title:"Quickstart - OpenAI Embedding",permalink:"/SynapseML/docs/next/Explore Algorithms/OpenAI/Quickstart - OpenAI Embedding"}},p={},l=[{value:"Prerequisites",id:"prerequisites",level:2},{value:"Import this guide as a notebook",id:"import-this-guide-as-a-notebook",level:2},{value:"Fill in service information",id:"fill-in-service-information",level:2},{value:"Create a dataset of prompts",id:"create-a-dataset-of-prompts",level:2},{value:"Create the OpenAICompletion Apache Spark Client",id:"create-the-openaicompletion-apache-spark-client",level:2},{value:"Transform the dataframe with the OpenAICompletion Client",id:"transform-the-dataframe-with-the-openaicompletion-client",level:2},{value:"More Usage Examples",id:"more-usage-examples",level:2},{value:"Generating Text Embeddings",id:"generating-text-embeddings",level:3},{value:"Chat Completion",id:"chat-completion",level:3},{value:"Improve throughput with request batching",id:"improve-throughput-with-request-batching",level:3},{value:"Using an automatic minibatcher",id:"using-an-automatic-minibatcher",level:3},{value:"Prompt engineering for translation",id:"prompt-engineering-for-translation",level:3},{value:"Prompt for question answering",id:"prompt-for-question-answering",level:3}],m={toc:l};function c(e){let{components:t,...n}=e;return(0,r.kt)("wrapper",(0,a.Z)({},m,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("h1",{id:"azure-openai-for-big-data"},"Azure OpenAI for big data"),(0,r.kt)("p",null,"The Azure OpenAI service can be used to solve a large number of natural language tasks through prompting the completion API. To make it easier to scale your prompting workflows from a few examples to large datasets of examples, we have integrated the Azure OpenAI service with the distributed machine learning library ",(0,r.kt)("a",{parentName:"p",href:"https://www.microsoft.com/en-us/research/blog/synapseml-a-simple-multilingual-and-massively-parallel-machine-learning-library/"},"SynapseML"),". This integration makes it easy to use the ",(0,r.kt)("a",{parentName:"p",href:"https://spark.apache.org/"},"Apache Spark")," distributed computing framework to process millions of prompts with the OpenAI service. This tutorial shows how to apply large language models at a distributed scale using Azure OpenAI. "),(0,r.kt)("h2",{id:"prerequisites"},"Prerequisites"),(0,r.kt)("p",null,"The key prerequisites for this quickstart include a working Azure OpenAI resource, and an Apache Spark cluster with SynapseML installed. We suggest creating a Synapse workspace, but an Azure Databricks, HDInsight, or Spark on Kubernetes, or even a python environment with the ",(0,r.kt)("inlineCode",{parentName:"p"},"pyspark")," package will work. "),(0,r.kt)("ol",null,(0,r.kt)("li",{parentName:"ol"},"An Azure OpenAI resource \u2013 request access ",(0,r.kt)("a",{parentName:"li",href:"https://customervoice.microsoft.com/Pages/ResponsePage.aspx?id=v4j5cvGGr0GRqy180BHbR7en2Ais5pxKtso_Pz4b1_xUOFA5Qk1UWDRBMjg0WFhPMkIzTzhKQ1dWNyQlQCN0PWcu"},"here")," before ",(0,r.kt)("a",{parentName:"li",href:"https://docs.microsoft.com/en-us/azure/cognitive-services/openai/how-to/create-resource?pivots=web-portal#create-a-resource"},"creating a resource")),(0,r.kt)("li",{parentName:"ol"},(0,r.kt)("a",{parentName:"li",href:"https://docs.microsoft.com/en-us/azure/synapse-analytics/get-started-create-workspace"},"Create a Synapse workspace")),(0,r.kt)("li",{parentName:"ol"},(0,r.kt)("a",{parentName:"li",href:"https://docs.microsoft.com/en-us/azure/synapse-analytics/get-started-analyze-spark#create-a-serverless-apache-spark-pool"},"Create a serverless Apache Spark pool"))),(0,r.kt)("h2",{id:"import-this-guide-as-a-notebook"},"Import this guide as a notebook"),(0,r.kt)("p",null,"The next step is to add this code into your Spark cluster. You can either create a notebook in your Spark platform and copy the code into this notebook to run the demo. Or download the notebook and import it into Synapse Analytics"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("a",{parentName:"li",href:"https://github.com/microsoft/SynapseML/blob/master/docs/Explore%20Algorithms/OpenAI/OpenAI.ipynb"},"Download this demo as a notebook")," (select ",(0,r.kt)("strong",{parentName:"li"},"Raw"),", then save the file)"),(0,r.kt)("li",{parentName:"ul"},"Import the notebook. ",(0,r.kt)("ul",{parentName:"li"},(0,r.kt)("li",{parentName:"ul"},"If you are using Synapse Analytics ",(0,r.kt)("a",{parentName:"li",href:"https://docs.microsoft.com/en-us/azure/synapse-analytics/spark/apache-spark-development-using-notebooks#create-a-notebook"},"into the Synapse Workspace")," "),(0,r.kt)("li",{parentName:"ul"},"If your are using Databricks ",(0,r.kt)("a",{parentName:"li",href:"https://docs.microsoft.com/en-us/azure/databricks/notebooks/notebooks-manage#create-a-notebook"},"import into the Databricks Workspace"),". "),(0,r.kt)("li",{parentName:"ul"},"If you are using Fabric ",(0,r.kt)("a",{parentName:"li",href:"https://learn.microsoft.com/en-us/fabric/data-engineering/how-to-use-notebook"},"import into the Fabric Workspace")))),(0,r.kt)("li",{parentName:"ul"},"Install SynapseML on your cluster. See the installation instructions for Synapse at the bottom of ",(0,r.kt)("a",{parentName:"li",href:"https://microsoft.github.io/SynapseML/"},"the SynapseML website"),". ",(0,r.kt)("ul",{parentName:"li"},(0,r.kt)("li",{parentName:"ul"},"If you are using Fabric, please check ",(0,r.kt)("a",{parentName:"li",href:"https://learn.microsoft.com/en-us/fabric/data-science/install-synapseml"},"Installation Guide"),". This requires pasting an extra cell at the top of the notebook you imported. "))),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("pre",{parentName:"li"},(0,r.kt)("code",{parentName:"pre"},"Connect your notebook to a cluster and follow along, editing and running the cells.\n")))),(0,r.kt)("h2",{id:"fill-in-service-information"},"Fill in service information"),(0,r.kt)("p",null,"Next, edit the cell in the notebook to point to your service. In particular set the ",(0,r.kt)("inlineCode",{parentName:"p"},"service_name"),", ",(0,r.kt)("inlineCode",{parentName:"p"},"deployment_name"),", ",(0,r.kt)("inlineCode",{parentName:"p"},"location"),", and ",(0,r.kt)("inlineCode",{parentName:"p"},"key")," variables to match them to your OpenAI service:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-python"},'from synapse.ml.core.platform import find_secret\n\n# Fill in the following lines with your service information\n# Learn more about selecting which embedding model to choose: https://openai.com/blog/new-and-improved-embedding-model\nservice_name = "synapseml-openai-2"\ndeployment_name = "gpt-35-turbo"\ndeployment_name_embeddings = "text-embedding-ada-002"\n\nkey = find_secret(\n secret_name="openai-api-key-2", keyvault="mmlspark-build-keys"\n) # please replace this line with your key as a string\n\nassert key is not None and service_name is not None\n')),(0,r.kt)("h2",{id:"create-a-dataset-of-prompts"},"Create a dataset of prompts"),(0,r.kt)("p",null,"Next, create a dataframe consisting of a series of rows, with one prompt per row. "),(0,r.kt)("p",null,"You can also load data directly from ADLS or other databases. For more information on loading and preparing Spark dataframes, see the ",(0,r.kt)("a",{parentName:"p",href:"https://spark.apache.org/docs/latest/sql-data-sources.html"},"Apache Spark data loading guide"),"."),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-python"},'df = spark.createDataFrame(\n [\n ("Hello my name is",),\n ("The best code is code thats",),\n ("SynapseML is ",),\n ]\n).toDF("prompt")\n')),(0,r.kt)("h2",{id:"create-the-openaicompletion-apache-spark-client"},"Create the OpenAICompletion Apache Spark Client"),(0,r.kt)("p",null,"To apply the OpenAI Completion service to your dataframe you created, create an OpenAICompletion object, which serves as a distributed client. Parameters of the service can be set either with a single value, or by a column of the dataframe with the appropriate setters on the ",(0,r.kt)("inlineCode",{parentName:"p"},"OpenAICompletion")," object. Here we're setting ",(0,r.kt)("inlineCode",{parentName:"p"},"maxTokens")," to 200. A token is around four characters, and this limit applies to the sum of the prompt and the result. We're also setting the ",(0,r.kt)("inlineCode",{parentName:"p"},"promptCol")," parameter with the name of the prompt column in the dataframe."),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-python"},'from synapse.ml.services.openai import OpenAICompletion\n\ncompletion = (\n OpenAICompletion()\n .setSubscriptionKey(key)\n .setDeploymentName(deployment_name)\n .setCustomServiceName(service_name)\n .setMaxTokens(200)\n .setPromptCol("prompt")\n .setErrorCol("error")\n .setOutputCol("completions")\n)\n')),(0,r.kt)("h2",{id:"transform-the-dataframe-with-the-openaicompletion-client"},"Transform the dataframe with the OpenAICompletion Client"),(0,r.kt)("p",null,"After creating the dataframe and the completion client, you can transform your input dataset and add a column called ",(0,r.kt)("inlineCode",{parentName:"p"},"completions")," with all of the information the service adds. Select just the text for simplicity."),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-python"},'from pyspark.sql.functions import col\n\ncompleted_df = completion.transform(df).cache()\ndisplay(\n completed_df.select(\n col("prompt"),\n col("error"),\n col("completions.choices.text").getItem(0).alias("text"),\n )\n)\n')),(0,r.kt)("p",null,"Your output should look something like this. The completion text will be different from the sample."),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"center"},(0,r.kt)("strong",{parentName:"th"},"prompt")),(0,r.kt)("th",{parentName:"tr",align:"center"},(0,r.kt)("strong",{parentName:"th"},"error")),(0,r.kt)("th",{parentName:"tr",align:"center"},(0,r.kt)("strong",{parentName:"th"},"text")))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"center"},"Hello my name is"),(0,r.kt)("td",{parentName:"tr",align:"center"},"null"),(0,r.kt)("td",{parentName:"tr",align:"center"},"Makaveli I'm eighteen years old and I want to be a rapper when I grow up I love writing and making music I'm from Los Angeles, CA")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"center"},"The best code is code thats"),(0,r.kt)("td",{parentName:"tr",align:"center"},"null"),(0,r.kt)("td",{parentName:"tr",align:"center"},"understandable This is a subjective statement, and there is no definitive answer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"center"},"SynapseML is"),(0,r.kt)("td",{parentName:"tr",align:"center"},"null"),(0,r.kt)("td",{parentName:"tr",align:"center"},"A machine learning algorithm that is able to learn how to predict the future outcome of events.")))),(0,r.kt)("h2",{id:"more-usage-examples"},"More Usage Examples"),(0,r.kt)("h3",{id:"generating-text-embeddings"},"Generating Text Embeddings"),(0,r.kt)("p",null,"In addition to completing text, we can also embed text for use in downstream algorithms or vector retrieval architectures. Creating embeddings allows you to search and retrieve documents from large collections and can be used when prompt engineering isn't sufficient for the task. For more information on using ",(0,r.kt)("inlineCode",{parentName:"p"},"OpenAIEmbedding"),", see our ",(0,r.kt)("a",{parentName:"p",href:"./Quickstart%20-%20OpenAI%20Embedding"},"embedding guide"),"."),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-python"},'from synapse.ml.services.openai import OpenAIEmbedding\n\nembedding = (\n OpenAIEmbedding()\n .setSubscriptionKey(key)\n .setDeploymentName(deployment_name_embeddings)\n .setCustomServiceName(service_name)\n .setTextCol("prompt")\n .setErrorCol("error")\n .setOutputCol("embeddings")\n)\n\ndisplay(embedding.transform(df))\n')),(0,r.kt)("h3",{id:"chat-completion"},"Chat Completion"),(0,r.kt)("p",null,"Models such as ChatGPT and GPT-4 are capable of understanding chats instead of single prompts. The ",(0,r.kt)("inlineCode",{parentName:"p"},"OpenAIChatCompletion")," transformer exposes this functionality at scale."),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-python"},'from synapse.ml.services.openai import OpenAIChatCompletion\nfrom pyspark.sql import Row\nfrom pyspark.sql.types import *\n\n\ndef make_message(role, content):\n return Row(role=role, content=content, name=role)\n\n\nchat_df = spark.createDataFrame(\n [\n (\n [\n make_message(\n "system", "You are an AI chatbot with red as your favorite color"\n ),\n make_message("user", "Whats your favorite color"),\n ],\n ),\n (\n [\n make_message("system", "You are very excited"),\n make_message("user", "How are you today"),\n ],\n ),\n ]\n).toDF("messages")\n\n\nchat_completion = (\n OpenAIChatCompletion()\n .setSubscriptionKey(key)\n .setDeploymentName(deployment_name)\n .setCustomServiceName(service_name)\n .setMessagesCol("messages")\n .setErrorCol("error")\n .setOutputCol("chat_completions")\n)\n\ndisplay(\n chat_completion.transform(chat_df).select(\n "messages", "chat_completions.choices.message.content"\n )\n)\n')),(0,r.kt)("h3",{id:"improve-throughput-with-request-batching"},"Improve throughput with request batching"),(0,r.kt)("p",null,'The example makes several requests to the service, one for each prompt. To complete multiple prompts in a single request, use batch mode. First, in the OpenAICompletion object, instead of setting the Prompt column to "Prompt", specify "batchPrompt" for the BatchPrompt column.\nTo do so, create a dataframe with a list of prompts per row.'),(0,r.kt)("p",null,'As of this writing there\'s currently a limit of 20 prompts in a single request, and a hard limit of 2048 "tokens", or approximately 1500 words.'),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-python"},"")),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-python"},'batch_df = spark.createDataFrame(\n [\n (["The time has come", "Pleased to", "Today stocks", "Here\'s to"],),\n (["The only thing", "Ask not what", "Every litter", "I am"],),\n ]\n).toDF("batchPrompt")\n')),(0,r.kt)("p",null,"Next we create the OpenAICompletion object. Rather than setting the prompt column, set the batchPrompt column if your column is of type ",(0,r.kt)("inlineCode",{parentName:"p"},"Array[String]"),"."),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-python"},'batch_completion = (\n OpenAICompletion()\n .setSubscriptionKey(key)\n .setDeploymentName(deployment_name)\n .setCustomServiceName(service_name)\n .setMaxTokens(200)\n .setBatchPromptCol("batchPrompt")\n .setErrorCol("error")\n .setOutputCol("completions")\n)\n')),(0,r.kt)("p",null,"In the call to transform, a request will be made per row. Since there are multiple prompts in a single row, each request is sent with all prompts in that row. The results contain a row for each row in the request."),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-python"},"completed_batch_df = batch_completion.transform(batch_df).cache()\ndisplay(completed_batch_df)\n")),(0,r.kt)("h3",{id:"using-an-automatic-minibatcher"},"Using an automatic minibatcher"),(0,r.kt)("p",null,"If your data is in column format, you can transpose it to row format using SynapseML's ",(0,r.kt)("inlineCode",{parentName:"p"},"FixedMiniBatcherTransformer"),"."),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-python"},'from pyspark.sql.types import StringType\nfrom synapse.ml.stages import FixedMiniBatchTransformer\nfrom synapse.ml.core.spark import FluentAPI\n\ncompleted_autobatch_df = (\n df.coalesce(\n 1\n ) # Force a single partition so that our little 4-row dataframe makes a batch of size 4, you can remove this step for large datasets\n .mlTransform(FixedMiniBatchTransformer(batchSize=4))\n .withColumnRenamed("prompt", "batchPrompt")\n .mlTransform(batch_completion)\n)\n\ndisplay(completed_autobatch_df)\n')),(0,r.kt)("h3",{id:"prompt-engineering-for-translation"},"Prompt engineering for translation"),(0,r.kt)("p",null,"The Azure OpenAI service can solve many different natural language tasks through ",(0,r.kt)("a",{parentName:"p",href:"https://docs.microsoft.com/en-us/azure/cognitive-services/openai/how-to/completions"},"prompt engineering"),". Here, we show an example of prompting for language translation:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-python"},'translate_df = spark.createDataFrame(\n [\n ("Japanese: Ookina hako \\nEnglish: Big box \\nJapanese: Midori tako\\nEnglish:",),\n (\n "French: Quel heure et il au Montreal? \\nEnglish: What time is it in Montreal? \\nFrench: Ou est le poulet? \\nEnglish:",\n ),\n ]\n).toDF("prompt")\n\ndisplay(completion.transform(translate_df))\n')),(0,r.kt)("h3",{id:"prompt-for-question-answering"},"Prompt for question answering"),(0,r.kt)("p",null,"Here, we prompt GPT-3 for general-knowledge question answering:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-python"},'qa_df = spark.createDataFrame(\n [\n (\n "Q: Where is the Grand Canyon?\\nA: The Grand Canyon is in Arizona.\\n\\nQ: What is the weight of the Burj Khalifa in kilograms?\\nA:",\n )\n ]\n).toDF("prompt")\n\ndisplay(completion.transform(qa_df))\n')))}c.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/96cf5ff0.b492f885.js b/assets/js/96cf5ff0.b492f885.js deleted file mode 100644 index 9e509eb1b0..0000000000 --- a/assets/js/96cf5ff0.b492f885.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunksynapseml=self.webpackChunksynapseml||[]).push([[46135],{3905:(e,t,n)=>{n.d(t,{Zo:()=>m,kt:()=>u});var a=n(67294);function r(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function i(e){for(var t=1;t=0||(r[n]=e[n]);return r}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var p=a.createContext({}),l=function(e){var t=a.useContext(p),n=t;return e&&(n="function"==typeof e?e(t):i(i({},t),e)),n},m=function(e){var t=l(e.components);return a.createElement(p.Provider,{value:t},e.children)},c={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},h=a.forwardRef((function(e,t){var n=e.components,r=e.mdxType,o=e.originalType,p=e.parentName,m=s(e,["components","mdxType","originalType","parentName"]),h=l(n),u=r,d=h["".concat(p,".").concat(u)]||h[u]||c[u]||o;return n?a.createElement(d,i(i({ref:t},m),{},{components:n})):a.createElement(d,i({ref:t},m))}));function u(e,t){var n=arguments,r=t&&t.mdxType;if("string"==typeof e||r){var o=n.length,i=new Array(o);i[0]=h;var s={};for(var p in t)hasOwnProperty.call(t,p)&&(s[p]=t[p]);s.originalType=e,s.mdxType="string"==typeof e?e:r,i[1]=s;for(var l=2;l{n.r(t),n.d(t,{assets:()=>p,contentTitle:()=>i,default:()=>c,frontMatter:()=>o,metadata:()=>s,toc:()=>l});var a=n(83117),r=(n(67294),n(3905));const o={title:"OpenAI",hide_title:!0,status:"stable"},i="Azure OpenAI for big data",s={unversionedId:"Explore Algorithms/OpenAI/OpenAI",id:"Explore Algorithms/OpenAI/OpenAI",title:"OpenAI",description:"The Azure OpenAI service can be used to solve a large number of natural language tasks through prompting the completion API. To make it easier to scale your prompting workflows from a few examples to large datasets of examples, we have integrated the Azure OpenAI service with the distributed machine learning library SynapseML. This integration makes it easy to use the Apache Spark distributed computing framework to process millions of prompts with the OpenAI service. This tutorial shows how to apply large language models at a distributed scale using Azure OpenAI.",source:"@site/docs/Explore Algorithms/OpenAI/OpenAI.md",sourceDirName:"Explore Algorithms/OpenAI",slug:"/Explore Algorithms/OpenAI/",permalink:"/SynapseML/docs/next/Explore Algorithms/OpenAI/",draft:!1,tags:[],version:"current",frontMatter:{title:"OpenAI",hide_title:!0,status:"stable"},sidebar:"docs",previous:{title:"Langchain",permalink:"/SynapseML/docs/next/Explore Algorithms/OpenAI/Langchain"},next:{title:"Quickstart - OpenAI Embedding",permalink:"/SynapseML/docs/next/Explore Algorithms/OpenAI/Quickstart - OpenAI Embedding"}},p={},l=[{value:"Prerequisites",id:"prerequisites",level:2},{value:"Import this guide as a notebook",id:"import-this-guide-as-a-notebook",level:2},{value:"Fill in service information",id:"fill-in-service-information",level:2},{value:"Create a dataset of prompts",id:"create-a-dataset-of-prompts",level:2},{value:"Create the OpenAICompletion Apache Spark Client",id:"create-the-openaicompletion-apache-spark-client",level:2},{value:"Transform the dataframe with the OpenAICompletion Client",id:"transform-the-dataframe-with-the-openaicompletion-client",level:2},{value:"More Usage Examples",id:"more-usage-examples",level:2},{value:"Generating Text Embeddings",id:"generating-text-embeddings",level:3},{value:"Chat Completion",id:"chat-completion",level:3},{value:"Improve throughput with request batching",id:"improve-throughput-with-request-batching",level:3},{value:"Using an automatic minibatcher",id:"using-an-automatic-minibatcher",level:3},{value:"Prompt engineering for translation",id:"prompt-engineering-for-translation",level:3},{value:"Prompt for question answering",id:"prompt-for-question-answering",level:3}],m={toc:l};function c(e){let{components:t,...n}=e;return(0,r.kt)("wrapper",(0,a.Z)({},m,n,{components:t,mdxType:"MDXLayout"}),(0,r.kt)("h1",{id:"azure-openai-for-big-data"},"Azure OpenAI for big data"),(0,r.kt)("p",null,"The Azure OpenAI service can be used to solve a large number of natural language tasks through prompting the completion API. To make it easier to scale your prompting workflows from a few examples to large datasets of examples, we have integrated the Azure OpenAI service with the distributed machine learning library ",(0,r.kt)("a",{parentName:"p",href:"https://www.microsoft.com/en-us/research/blog/synapseml-a-simple-multilingual-and-massively-parallel-machine-learning-library/"},"SynapseML"),". This integration makes it easy to use the ",(0,r.kt)("a",{parentName:"p",href:"https://spark.apache.org/"},"Apache Spark")," distributed computing framework to process millions of prompts with the OpenAI service. This tutorial shows how to apply large language models at a distributed scale using Azure OpenAI. "),(0,r.kt)("h2",{id:"prerequisites"},"Prerequisites"),(0,r.kt)("p",null,"The key prerequisites for this quickstart include a working Azure OpenAI resource, and an Apache Spark cluster with SynapseML installed. We suggest creating a Synapse workspace, but an Azure Databricks, HDInsight, or Spark on Kubernetes, or even a python environment with the ",(0,r.kt)("inlineCode",{parentName:"p"},"pyspark")," package will work. "),(0,r.kt)("ol",null,(0,r.kt)("li",{parentName:"ol"},"An Azure OpenAI resource \u2013 request access ",(0,r.kt)("a",{parentName:"li",href:"https://customervoice.microsoft.com/Pages/ResponsePage.aspx?id=v4j5cvGGr0GRqy180BHbR7en2Ais5pxKtso_Pz4b1_xUOFA5Qk1UWDRBMjg0WFhPMkIzTzhKQ1dWNyQlQCN0PWcu"},"here")," before ",(0,r.kt)("a",{parentName:"li",href:"https://docs.microsoft.com/en-us/azure/cognitive-services/openai/how-to/create-resource?pivots=web-portal#create-a-resource"},"creating a resource")),(0,r.kt)("li",{parentName:"ol"},(0,r.kt)("a",{parentName:"li",href:"https://docs.microsoft.com/en-us/azure/synapse-analytics/get-started-create-workspace"},"Create a Synapse workspace")),(0,r.kt)("li",{parentName:"ol"},(0,r.kt)("a",{parentName:"li",href:"https://docs.microsoft.com/en-us/azure/synapse-analytics/get-started-analyze-spark#create-a-serverless-apache-spark-pool"},"Create a serverless Apache Spark pool"))),(0,r.kt)("h2",{id:"import-this-guide-as-a-notebook"},"Import this guide as a notebook"),(0,r.kt)("p",null,"The next step is to add this code into your Spark cluster. You can either create a notebook in your Spark platform and copy the code into this notebook to run the demo. Or download the notebook and import it into Synapse Analytics"),(0,r.kt)("ul",null,(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("a",{parentName:"li",href:"https://github.com/microsoft/SynapseML/blob/master/docs/Explore%20Algorithms/OpenAI/OpenAI.ipynb"},"Download this demo as a notebook")," (select ",(0,r.kt)("strong",{parentName:"li"},"Raw"),", then save the file)"),(0,r.kt)("li",{parentName:"ul"},"Import the notebook. ",(0,r.kt)("ul",{parentName:"li"},(0,r.kt)("li",{parentName:"ul"},"If you are using Synapse Analytics ",(0,r.kt)("a",{parentName:"li",href:"https://docs.microsoft.com/en-us/azure/synapse-analytics/spark/apache-spark-development-using-notebooks#create-a-notebook"},"into the Synapse Workspace")," "),(0,r.kt)("li",{parentName:"ul"},"If your are using Databricks ",(0,r.kt)("a",{parentName:"li",href:"https://docs.microsoft.com/en-us/azure/databricks/notebooks/notebooks-manage#create-a-notebook"},"import into the Databricks Workspace"),". "),(0,r.kt)("li",{parentName:"ul"},"If you are using Fabric ",(0,r.kt)("a",{parentName:"li",href:"https://learn.microsoft.com/en-us/fabric/data-engineering/how-to-use-notebook"},"import into the Fabric Workspace")))),(0,r.kt)("li",{parentName:"ul"},"Install SynapseML on your cluster. See the installation instructions for Synapse at the bottom of ",(0,r.kt)("a",{parentName:"li",href:"https://microsoft.github.io/SynapseML/"},"the SynapseML website"),". ",(0,r.kt)("ul",{parentName:"li"},(0,r.kt)("li",{parentName:"ul"},"If you are using Fabric, please check ",(0,r.kt)("a",{parentName:"li",href:"https://learn.microsoft.com/en-us/fabric/data-science/install-synapseml"},"Installation Guide"),". This requires pasting an extra cell at the top of the notebook you imported. "))),(0,r.kt)("li",{parentName:"ul"},(0,r.kt)("pre",{parentName:"li"},(0,r.kt)("code",{parentName:"pre"},"Connect your notebook to a cluster and follow along, editing and running the cells.\n")))),(0,r.kt)("h2",{id:"fill-in-service-information"},"Fill in service information"),(0,r.kt)("p",null,"Next, edit the cell in the notebook to point to your service. In particular set the ",(0,r.kt)("inlineCode",{parentName:"p"},"service_name"),", ",(0,r.kt)("inlineCode",{parentName:"p"},"deployment_name"),", ",(0,r.kt)("inlineCode",{parentName:"p"},"location"),", and ",(0,r.kt)("inlineCode",{parentName:"p"},"key")," variables to match them to your OpenAI service:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-python"},'from synapse.ml.core.platform import find_secret\n\n# Fill in the following lines with your service information\n# Learn more about selecting which embedding model to choose: https://openai.com/blog/new-and-improved-embedding-model\nservice_name = "synapseml-openai"\ndeployment_name = "gpt-35-turbo"\ndeployment_name_embeddings = "text-embedding-ada-002"\n\nkey = find_secret(\n secret_name="openai-api-key", keyvault="mmlspark-build-keys"\n) # please replace this line with your key as a string\n\nassert key is not None and service_name is not None\n')),(0,r.kt)("h2",{id:"create-a-dataset-of-prompts"},"Create a dataset of prompts"),(0,r.kt)("p",null,"Next, create a dataframe consisting of a series of rows, with one prompt per row. "),(0,r.kt)("p",null,"You can also load data directly from ADLS or other databases. For more information on loading and preparing Spark dataframes, see the ",(0,r.kt)("a",{parentName:"p",href:"https://spark.apache.org/docs/latest/sql-data-sources.html"},"Apache Spark data loading guide"),"."),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-python"},'df = spark.createDataFrame(\n [\n ("Hello my name is",),\n ("The best code is code thats",),\n ("SynapseML is ",),\n ]\n).toDF("prompt")\n')),(0,r.kt)("h2",{id:"create-the-openaicompletion-apache-spark-client"},"Create the OpenAICompletion Apache Spark Client"),(0,r.kt)("p",null,"To apply the OpenAI Completion service to your dataframe you created, create an OpenAICompletion object, which serves as a distributed client. Parameters of the service can be set either with a single value, or by a column of the dataframe with the appropriate setters on the ",(0,r.kt)("inlineCode",{parentName:"p"},"OpenAICompletion")," object. Here we're setting ",(0,r.kt)("inlineCode",{parentName:"p"},"maxTokens")," to 200. A token is around four characters, and this limit applies to the sum of the prompt and the result. We're also setting the ",(0,r.kt)("inlineCode",{parentName:"p"},"promptCol")," parameter with the name of the prompt column in the dataframe."),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-python"},'from synapse.ml.services.openai import OpenAICompletion\n\ncompletion = (\n OpenAICompletion()\n .setSubscriptionKey(key)\n .setDeploymentName(deployment_name)\n .setCustomServiceName(service_name)\n .setMaxTokens(200)\n .setPromptCol("prompt")\n .setErrorCol("error")\n .setOutputCol("completions")\n)\n')),(0,r.kt)("h2",{id:"transform-the-dataframe-with-the-openaicompletion-client"},"Transform the dataframe with the OpenAICompletion Client"),(0,r.kt)("p",null,"After creating the dataframe and the completion client, you can transform your input dataset and add a column called ",(0,r.kt)("inlineCode",{parentName:"p"},"completions")," with all of the information the service adds. Select just the text for simplicity."),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-python"},'from pyspark.sql.functions import col\n\ncompleted_df = completion.transform(df).cache()\ndisplay(\n completed_df.select(\n col("prompt"),\n col("error"),\n col("completions.choices.text").getItem(0).alias("text"),\n )\n)\n')),(0,r.kt)("p",null,"Your output should look something like this. The completion text will be different from the sample."),(0,r.kt)("table",null,(0,r.kt)("thead",{parentName:"table"},(0,r.kt)("tr",{parentName:"thead"},(0,r.kt)("th",{parentName:"tr",align:"center"},(0,r.kt)("strong",{parentName:"th"},"prompt")),(0,r.kt)("th",{parentName:"tr",align:"center"},(0,r.kt)("strong",{parentName:"th"},"error")),(0,r.kt)("th",{parentName:"tr",align:"center"},(0,r.kt)("strong",{parentName:"th"},"text")))),(0,r.kt)("tbody",{parentName:"table"},(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"center"},"Hello my name is"),(0,r.kt)("td",{parentName:"tr",align:"center"},"null"),(0,r.kt)("td",{parentName:"tr",align:"center"},"Makaveli I'm eighteen years old and I want to be a rapper when I grow up I love writing and making music I'm from Los Angeles, CA")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"center"},"The best code is code thats"),(0,r.kt)("td",{parentName:"tr",align:"center"},"null"),(0,r.kt)("td",{parentName:"tr",align:"center"},"understandable This is a subjective statement, and there is no definitive answer.")),(0,r.kt)("tr",{parentName:"tbody"},(0,r.kt)("td",{parentName:"tr",align:"center"},"SynapseML is"),(0,r.kt)("td",{parentName:"tr",align:"center"},"null"),(0,r.kt)("td",{parentName:"tr",align:"center"},"A machine learning algorithm that is able to learn how to predict the future outcome of events.")))),(0,r.kt)("h2",{id:"more-usage-examples"},"More Usage Examples"),(0,r.kt)("h3",{id:"generating-text-embeddings"},"Generating Text Embeddings"),(0,r.kt)("p",null,"In addition to completing text, we can also embed text for use in downstream algorithms or vector retrieval architectures. Creating embeddings allows you to search and retrieve documents from large collections and can be used when prompt engineering isn't sufficient for the task. For more information on using ",(0,r.kt)("inlineCode",{parentName:"p"},"OpenAIEmbedding"),", see our ",(0,r.kt)("a",{parentName:"p",href:"./Quickstart%20-%20OpenAI%20Embedding"},"embedding guide"),"."),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-python"},'from synapse.ml.services.openai import OpenAIEmbedding\n\nembedding = (\n OpenAIEmbedding()\n .setSubscriptionKey(key)\n .setDeploymentName(deployment_name_embeddings)\n .setCustomServiceName(service_name)\n .setTextCol("prompt")\n .setErrorCol("error")\n .setOutputCol("embeddings")\n)\n\ndisplay(embedding.transform(df))\n')),(0,r.kt)("h3",{id:"chat-completion"},"Chat Completion"),(0,r.kt)("p",null,"Models such as ChatGPT and GPT-4 are capable of understanding chats instead of single prompts. The ",(0,r.kt)("inlineCode",{parentName:"p"},"OpenAIChatCompletion")," transformer exposes this functionality at scale."),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-python"},'from synapse.ml.services.openai import OpenAIChatCompletion\nfrom pyspark.sql import Row\nfrom pyspark.sql.types import *\n\n\ndef make_message(role, content):\n return Row(role=role, content=content, name=role)\n\n\nchat_df = spark.createDataFrame(\n [\n (\n [\n make_message(\n "system", "You are an AI chatbot with red as your favorite color"\n ),\n make_message("user", "Whats your favorite color"),\n ],\n ),\n (\n [\n make_message("system", "You are very excited"),\n make_message("user", "How are you today"),\n ],\n ),\n ]\n).toDF("messages")\n\n\nchat_completion = (\n OpenAIChatCompletion()\n .setSubscriptionKey(key)\n .setDeploymentName(deployment_name)\n .setCustomServiceName(service_name)\n .setMessagesCol("messages")\n .setErrorCol("error")\n .setOutputCol("chat_completions")\n)\n\ndisplay(\n chat_completion.transform(chat_df).select(\n "messages", "chat_completions.choices.message.content"\n )\n)\n')),(0,r.kt)("h3",{id:"improve-throughput-with-request-batching"},"Improve throughput with request batching"),(0,r.kt)("p",null,'The example makes several requests to the service, one for each prompt. To complete multiple prompts in a single request, use batch mode. First, in the OpenAICompletion object, instead of setting the Prompt column to "Prompt", specify "batchPrompt" for the BatchPrompt column.\nTo do so, create a dataframe with a list of prompts per row.'),(0,r.kt)("p",null,'As of this writing there\'s currently a limit of 20 prompts in a single request, and a hard limit of 2048 "tokens", or approximately 1500 words.'),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-python"},"")),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-python"},'batch_df = spark.createDataFrame(\n [\n (["The time has come", "Pleased to", "Today stocks", "Here\'s to"],),\n (["The only thing", "Ask not what", "Every litter", "I am"],),\n ]\n).toDF("batchPrompt")\n')),(0,r.kt)("p",null,"Next we create the OpenAICompletion object. Rather than setting the prompt column, set the batchPrompt column if your column is of type ",(0,r.kt)("inlineCode",{parentName:"p"},"Array[String]"),"."),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-python"},'batch_completion = (\n OpenAICompletion()\n .setSubscriptionKey(key)\n .setDeploymentName(deployment_name)\n .setCustomServiceName(service_name)\n .setMaxTokens(200)\n .setBatchPromptCol("batchPrompt")\n .setErrorCol("error")\n .setOutputCol("completions")\n)\n')),(0,r.kt)("p",null,"In the call to transform, a request will be made per row. Since there are multiple prompts in a single row, each request is sent with all prompts in that row. The results contain a row for each row in the request."),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-python"},"completed_batch_df = batch_completion.transform(batch_df).cache()\ndisplay(completed_batch_df)\n")),(0,r.kt)("h3",{id:"using-an-automatic-minibatcher"},"Using an automatic minibatcher"),(0,r.kt)("p",null,"If your data is in column format, you can transpose it to row format using SynapseML's ",(0,r.kt)("inlineCode",{parentName:"p"},"FixedMiniBatcherTransformer"),"."),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-python"},'from pyspark.sql.types import StringType\nfrom synapse.ml.stages import FixedMiniBatchTransformer\nfrom synapse.ml.core.spark import FluentAPI\n\ncompleted_autobatch_df = (\n df.coalesce(\n 1\n ) # Force a single partition so that our little 4-row dataframe makes a batch of size 4, you can remove this step for large datasets\n .mlTransform(FixedMiniBatchTransformer(batchSize=4))\n .withColumnRenamed("prompt", "batchPrompt")\n .mlTransform(batch_completion)\n)\n\ndisplay(completed_autobatch_df)\n')),(0,r.kt)("h3",{id:"prompt-engineering-for-translation"},"Prompt engineering for translation"),(0,r.kt)("p",null,"The Azure OpenAI service can solve many different natural language tasks through ",(0,r.kt)("a",{parentName:"p",href:"https://docs.microsoft.com/en-us/azure/cognitive-services/openai/how-to/completions"},"prompt engineering"),". Here, we show an example of prompting for language translation:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-python"},'translate_df = spark.createDataFrame(\n [\n ("Japanese: Ookina hako \\nEnglish: Big box \\nJapanese: Midori tako\\nEnglish:",),\n (\n "French: Quel heure et il au Montreal? \\nEnglish: What time is it in Montreal? \\nFrench: Ou est le poulet? \\nEnglish:",\n ),\n ]\n).toDF("prompt")\n\ndisplay(completion.transform(translate_df))\n')),(0,r.kt)("h3",{id:"prompt-for-question-answering"},"Prompt for question answering"),(0,r.kt)("p",null,"Here, we prompt GPT-3 for general-knowledge question answering:"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-python"},'qa_df = spark.createDataFrame(\n [\n (\n "Q: Where is the Grand Canyon?\\nA: The Grand Canyon is in Arizona.\\n\\nQ: What is the weight of the Burj Khalifa in kilograms?\\nA:",\n )\n ]\n).toDF("prompt")\n\ndisplay(completion.transform(qa_df))\n')))}c.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/c0391845.8a1f3fd4.js b/assets/js/c0391845.8a1f3fd4.js deleted file mode 100644 index 50d2db5db5..0000000000 --- a/assets/js/c0391845.8a1f3fd4.js +++ /dev/null @@ -1 +0,0 @@ -"use strict";(self.webpackChunksynapseml=self.webpackChunksynapseml||[]).push([[23234],{3905:(e,t,n)=>{n.d(t,{Zo:()=>l,kt:()=>m});var a=n(67294);function o(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function r(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function s(e){for(var t=1;t=0||(o[n]=e[n]);return o}(e,t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(o[n]=e[n])}return o}var p=a.createContext({}),c=function(e){var t=a.useContext(p),n=t;return e&&(n="function"==typeof e?e(t):s(s({},t),e)),n},l=function(e){var t=c(e.components);return a.createElement(p.Provider,{value:t},e.children)},d={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},u=a.forwardRef((function(e,t){var n=e.components,o=e.mdxType,r=e.originalType,p=e.parentName,l=i(e,["components","mdxType","originalType","parentName"]),u=c(n),m=o,h=u["".concat(p,".").concat(m)]||u[m]||d[m]||r;return n?a.createElement(h,s(s({ref:t},l),{},{components:n})):a.createElement(h,s({ref:t},l))}));function m(e,t){var n=arguments,o=t&&t.mdxType;if("string"==typeof e||o){var r=n.length,s=new Array(r);s[0]=u;var i={};for(var p in t)hasOwnProperty.call(t,p)&&(i[p]=t[p]);i.originalType=e,i.mdxType="string"==typeof e?e:o,s[1]=i;for(var c=2;c{n.r(t),n.d(t,{assets:()=>p,contentTitle:()=>s,default:()=>d,frontMatter:()=>r,metadata:()=>i,toc:()=>c});var a=n(83117),o=(n(67294),n(3905));const r={title:"Quickstart - Document Question and Answering with PDFs",hide_title:!0,status:"stable"},s="A Guide to Q&A on PDF Documents",i={unversionedId:"Explore Algorithms/AI Services/Quickstart - Document Question and Answering with PDFs",id:"Explore Algorithms/AI Services/Quickstart - Document Question and Answering with PDFs",title:"Quickstart - Document Question and Answering with PDFs",description:"Introduction",source:"@site/docs/Explore Algorithms/AI Services/Quickstart - Document Question and Answering with PDFs.md",sourceDirName:"Explore Algorithms/AI Services",slug:"/Explore Algorithms/AI Services/Quickstart - Document Question and Answering with PDFs",permalink:"/SynapseML/docs/next/Explore Algorithms/AI Services/Quickstart - Document Question and Answering with PDFs",draft:!1,tags:[],version:"current",frontMatter:{title:"Quickstart - Document Question and Answering with PDFs",hide_title:!0,status:"stable"},sidebar:"docs",previous:{title:"Quickstart - Create Audiobooks",permalink:"/SynapseML/docs/next/Explore Algorithms/AI Services/Quickstart - Create Audiobooks"},next:{title:"Quickstart - Flooding Risk",permalink:"/SynapseML/docs/next/Explore Algorithms/AI Services/Quickstart - Flooding Risk"}},p={},c=[{value:"Introduction",id:"introduction",level:2},{value:"Step 1: Provide the keys for Azure AI Services and Azure OpenAI to authenticate the applications.",id:"step-1-provide-the-keys-for-azure-ai-services-and-azure-openai-to-authenticate-the-applications",level:3},{value:"Step 2: Load the PDF documents into a Spark DataFrame.",id:"step-2-load-the-pdf-documents-into-a-spark-dataframe",level:3},{value:"Display the raw data from the PDF documents",id:"display-the-raw-data-from-the-pdf-documents",level:5},{value:"Step 3: Read the documents using Azure AI Document Intelligence.",id:"step-3-read-the-documents-using-azure-ai-document-intelligence",level:3},{value:"Step 4: Split the documents into chunks.",id:"step-4-split-the-documents-into-chunks",level:3},{value:"Step 5: Generate Embeddings.",id:"step-5-generate-embeddings",level:3},{value:"Step 6: Store the embeddings in Azure Cognitive Search Vector Store.",id:"step-6-store-the-embeddings-in-azure-cognitive-search-vector-store",level:3},{value:"Step 7: Ask a Question.",id:"step-7-ask-a-question",level:3},{value:"Step 8: Respond to a User\u2019s Question.",id:"step-8-respond-to-a-users-question",level:3}],l={toc:c};function d(e){let{components:t,...n}=e;return(0,o.kt)("wrapper",(0,a.Z)({},l,n,{components:t,mdxType:"MDXLayout"}),(0,o.kt)("h1",{id:"a-guide-to-qa-on-pdf-documents"},"A Guide to Q&A on PDF Documents"),(0,o.kt)("h2",{id:"introduction"},"Introduction"),(0,o.kt)("p",null,"In this notebook, we'll demonstrate how to develop a context-aware question answering framework for any form of a document using ",(0,o.kt)("a",{parentName:"p",href:"https://azure.microsoft.com/products/ai-services/openai-service"},"OpenAI models"),", ",(0,o.kt)("a",{parentName:"p",href:"https://microsoft.github.io/SynapseML/"},"SynapseML")," and ",(0,o.kt)("a",{parentName:"p",href:"https://azure.microsoft.com/products/ai-services/"},"Azure AI Services"),". In this notebook, we assume that PDF documents are the source of data, however, the same framework can be easiy extended to other document formats too. "),(0,o.kt)("p",null,"We\u2019ll cover the following key steps:"),(0,o.kt)("ol",null,(0,o.kt)("li",{parentName:"ol"},"Preprocessing PDF Documents: Learn how to load the PDF documents into a Spark DataFrame, read the documents using the ",(0,o.kt)("a",{parentName:"li",href:"https://azure.microsoft.com/products/ai-services/ai-document-intelligence"},"Azure AI Document Intelligence")," in Azure AI Services, and use SynapseML to split the documents into chunks."),(0,o.kt)("li",{parentName:"ol"},"Embedding Generation and Storage: Learn how to generate embeddings for the chunks using SynapseML and ",(0,o.kt)("a",{parentName:"li",href:"https://azure.microsoft.com/products/ai-services/openai-service"},"Azure OpenAI Services"),", store the embeddings in a vector store using ",(0,o.kt)("a",{parentName:"li",href:"https://azure.microsoft.com/products/search"},"Azure Cognitive Search"),", and search the vector store to answer the user\u2019s question."),(0,o.kt)("li",{parentName:"ol"},"Question Answering Pipeline: Learn how to retrieve relevant document based on the user\u2019s question and provide the answer using ",(0,o.kt)("a",{parentName:"li",href:"https://python.langchain.com/en/latest/index.html#"},"Langchain"),".")),(0,o.kt)("p",null,"We start by installing the necessary python libraries."),(0,o.kt)("pre",null,(0,o.kt)("code",{parentName:"pre",className:"language-python"},"%pip install openai==0.28.1 langchain==0.0.331\n")),(0,o.kt)("h3",{id:"step-1-provide-the-keys-for-azure-ai-services-and-azure-openai-to-authenticate-the-applications"},"Step 1: Provide the keys for Azure AI Services and Azure OpenAI to authenticate the applications."),(0,o.kt)("p",null,"To authenticate Azure AI Services and Azure OpenAI applications, you need to provide the respective API keys. Here is an example of how you can provide the keys in Python code. ",(0,o.kt)("inlineCode",{parentName:"p"},"find_secret()")," function uses Azure Keyvault to get the API keys, however you can directly paste your own keys there."),(0,o.kt)("pre",null,(0,o.kt)("code",{parentName:"pre",className:"language-python"},'from pyspark.sql import SparkSession\nfrom synapse.ml.core.platform import find_secret\n\nai_services_key = find_secret(\n secret_name="ai-services-api-key", keyvault="mmlspark-build-keys"\n)\nai_services_location = "eastus"\n\n# Fill in the following lines with your Azure service information\naoai_service_name = "synapseml-openai"\naoai_endpoint = f"https://{aoai_service_name}.openai.azure.com/"\naoai_key = find_secret(secret_name="openai-api-key", keyvault="mmlspark-build-keys")\naoai_deployment_name_embeddings = "text-embedding-ada-002"\naoai_deployment_name_query = "text-davinci-003"\naoai_model_name_query = "text-davinci-003"\n\n# Azure Cognitive Search\ncogsearch_name = "mmlspark-azure-search"\ncogsearch_index_name = "examplevectorindex"\ncogsearch_api_key = find_secret(\n secret_name="azure-search-key", keyvault="mmlspark-build-keys"\n)\n')),(0,o.kt)("h3",{id:"step-2-load-the-pdf-documents-into-a-spark-dataframe"},"Step 2: Load the PDF documents into a Spark DataFrame."),(0,o.kt)("p",null,"For this tutorial, we will be using NASA's ",(0,o.kt)("a",{parentName:"p",href:"https://www.nasa.gov/sites/default/files/atoms/files/earth_book_2019_tagged.pdf"},"Earth")," and ",(0,o.kt)("a",{parentName:"p",href:"https://www.nasa.gov/sites/default/files/atoms/files/earth_at_night_508.pdf"},"Earth at Night")," e-books. To load PDF documents into a Spark DataFrame, you can use the ",(0,o.kt)("inlineCode",{parentName:"p"},'spark.read.format("binaryFile")')," method provided by Apache Spark."),(0,o.kt)("pre",null,(0,o.kt)("code",{parentName:"pre",className:"language-python"},'from pyspark.sql.functions import udf\nfrom pyspark.sql.types import StringType\n\ndocument_path = "wasbs://public@synapseaisolutionsa.blob.core.windows.net/NASAEarth" # path to your document\ndf = spark.read.format("binaryFile").load(document_path).limit(10).cache()\n')),(0,o.kt)("p",null,"This code will read the PDF documents and create a Spark DataFrame named df with the contents of the PDFs. The DataFrame will have a schema that represents the structure of the PDF documents, including their textual content."),(0,o.kt)("p",null,"Let's take a glimpse at the contents of the e-books we are working with. Below are some screenshots that showcase the essence of the books; as you can see they contain information about the Earth."),(0,o.kt)("img",{src:"https://mmlspark.blob.core.windows.net/graphics/notebooks/NASAearthbook_screenshot.png",width:"500"}),(0,o.kt)("img",{src:"https://mmlspark.blob.core.windows.net/graphics/notebooks/NASAearthatnight_screenshot.png",width:"460"}),(0,o.kt)("h5",{id:"display-the-raw-data-from-the-pdf-documents"},"Display the raw data from the PDF documents"),(0,o.kt)("pre",null,(0,o.kt)("code",{parentName:"pre",className:"language-python"},'# Show the dataframe without the content\ndisplay(df.drop("content"))\n')),(0,o.kt)("h3",{id:"step-3-read-the-documents-using-azure-ai-document-intelligence"},"Step 3: Read the documents using Azure AI Document Intelligence."),(0,o.kt)("p",null,"We utilize ",(0,o.kt)("a",{parentName:"p",href:"https://microsoft.github.io/SynapseML/"},"SynapseML"),", an ecosystem of tools designed to enhance the distributed computing framework ",(0,o.kt)("a",{parentName:"p",href:"https://github.com/apache/spark"},"Apache Spark"),". SynapseML introduces advanced networking capabilities to the Spark ecosystem and offers user-friendly SparkML transformers for various ",(0,o.kt)("a",{parentName:"p",href:"https://azure.microsoft.com/products/ai-services"},"Azure AI Services"),"."),(0,o.kt)("p",null,'Additionally, we employ AnalyzeDocument from Azure AI Services to extract the complete document content and present it in the designated columns called "output_content" and "paragraph."'),(0,o.kt)("pre",null,(0,o.kt)("code",{parentName:"pre",className:"language-python"},'from synapse.ml.services.form import AnalyzeDocument\nfrom pyspark.sql.functions import col\n\nanalyze_document = (\n AnalyzeDocument()\n .setPrebuiltModelId("prebuilt-layout")\n .setSubscriptionKey(ai_services_key)\n .setLocation(ai_services_location)\n .setImageBytesCol("content")\n .setOutputCol("result")\n .setPages(\n "1-15"\n ) # Here we are reading the first 15 pages of the documents for demo purposes\n)\n\nanalyzed_df = (\n analyze_document.transform(df)\n .withColumn("output_content", col("result.analyzeResult.content"))\n .withColumn("paragraphs", col("result.analyzeResult.paragraphs"))\n).cache()\n')),(0,o.kt)("p",null,"We can observe the analayzed Spark DataFrame named ",(0,o.kt)("inlineCode",{parentName:"p"},"analyzed_df"),' using the following code. Note that we drop the "content" column as it is not needed anymore.'),(0,o.kt)("pre",null,(0,o.kt)("code",{parentName:"pre",className:"language-python"},'analyzed_df = analyzed_df.drop("content")\ndisplay(analyzed_df)\n')),(0,o.kt)("h3",{id:"step-4-split-the-documents-into-chunks"},"Step 4: Split the documents into chunks."),(0,o.kt)("p",null,"After analyzing the document, we leverage SynapseML\u2019s PageSplitter to divide the documents into smaller sections, which are subsequently stored in the \u201cchunks\u201d column. This allows for more granular representation and processing of the document content."),(0,o.kt)("pre",null,(0,o.kt)("code",{parentName:"pre",className:"language-python"},'from synapse.ml.featurize.text import PageSplitter\n\nps = (\n PageSplitter()\n .setInputCol("output_content")\n .setMaximumPageLength(4000)\n .setMinimumPageLength(3000)\n .setOutputCol("chunks")\n)\n\nsplitted_df = ps.transform(analyzed_df)\ndisplay(splitted_df)\n')),(0,o.kt)("p",null,"Note that the chunks for each document are presented in a single row inside an array. In order to embed all the chunks in the following cells, we need to have each chunk in a separate row. To accomplish that, we first explode these arrays so there is only one chunk in each row, then filter the Spark DataFrame in order to only keep the path to the document and the chunk in a single row."),(0,o.kt)("pre",null,(0,o.kt)("code",{parentName:"pre",className:"language-python"},'# Each column contains many chunks for the same document as a vector.\n# Explode will distribute and replicate the content of a vecor across multple rows\nfrom pyspark.sql.functions import explode, col\n\nexploded_df = splitted_df.select("path", explode(col("chunks")).alias("chunk")).select(\n "path", "chunk"\n)\ndisplay(exploded_df)\n')),(0,o.kt)("h3",{id:"step-5-generate-embeddings"},"Step 5: Generate Embeddings."),(0,o.kt)("p",null,"To produce embeddings for each chunk, we utilize both SynapseML and Azure OpenAI Service. By integrating the Azure OpenAI service with SynapseML, we can leverage the power of the Apache Spark distributed computing framework to process numerous prompts using the OpenAI service. This integration enables the SynapseML embedding client to generate embeddings in a distributed manner, enabling efficient processing of large volumes of data. If you're interested in applying large language models at a distributed scale using Azure OpenAI and Azure Synapse Analytics, you can refer to ",(0,o.kt)("a",{parentName:"p",href:"https://microsoft.github.io/SynapseML/docs/Explore%20Algorithms/OpenAI/"},"this approach"),". For more detailed information on generating embeddings with Azure OpenAI, you can look ",(0,o.kt)("a",{parentName:"p",href:"https://learn.microsoft.com/azure/cognitive-services/openai/how-to/embeddings?tabs=console"},"here"),"."),(0,o.kt)("pre",null,(0,o.kt)("code",{parentName:"pre",className:"language-python"},'from synapse.ml.services.openai import OpenAIEmbedding\n\nembedding = (\n OpenAIEmbedding()\n .setSubscriptionKey(aoai_key)\n .setDeploymentName(aoai_deployment_name_embeddings)\n .setCustomServiceName(aoai_service_name)\n .setTextCol("chunk")\n .setErrorCol("error")\n .setOutputCol("embeddings")\n)\n\ndf_embeddings = embedding.transform(exploded_df)\n\ndisplay(df_embeddings)\n')),(0,o.kt)("h3",{id:"step-6-store-the-embeddings-in-azure-cognitive-search-vector-store"},"Step 6: Store the embeddings in Azure Cognitive Search Vector Store."),(0,o.kt)("p",null,(0,o.kt)("a",{parentName:"p",href:"https://learn.microsoft.com/azure/search/search-what-is-azure-search"},"Azure Cognitive Search")," offers a user-friendly interface for creating a vector database, as well as storing and retrieving data using vector search. If you're interested in learning more about vector search, you can look ",(0,o.kt)("a",{parentName:"p",href:"https://github.com/Azure/cognitive-search-vector-pr/tree/main"},"here"),"."),(0,o.kt)("p",null,"Storing data in the AzureCogSearch vector database involves two main steps:"),(0,o.kt)("p",null,"Creating the Index: The first step is to establish the index or schema of the vector database. This entails defining the structure and properties of the data that will be stored and indexed in the vector database."),(0,o.kt)("p",null,"Adding Chunked Documents and Embeddings: The second step involves adding the chunked documents, along with their corresponding embeddings, to the vector datastore. This allows for efficient storage and retrieval of the data using vector search capabilities."),(0,o.kt)("p",null,"By following these steps, you can effectively store your chunked documents and their associated embeddings in the AzureCogSearch vector database, enabling seamless retrieval of relevant information through vector search functionality."),(0,o.kt)("pre",null,(0,o.kt)("code",{parentName:"pre",className:"language-python"},'from pyspark.sql.functions import monotonically_increasing_id\nfrom pyspark.sql.functions import lit\n\ndf_embeddings = (\n df_embeddings.drop("error")\n .withColumn(\n "idx", monotonically_increasing_id().cast("string")\n ) # create index ID for ACS\n .withColumn("searchAction", lit("upload"))\n)\n')),(0,o.kt)("pre",null,(0,o.kt)("code",{parentName:"pre",className:"language-python"},'from synapse.ml.services import writeToAzureSearch\nimport json\n\ndf_embeddings.writeToAzureSearch(\n subscriptionKey=cogsearch_api_key,\n actionCol="searchAction",\n serviceName=cogsearch_name,\n indexName=cogsearch_index_name,\n keyCol="idx",\n vectorCols=json.dumps([{"name": "embeddings", "dimension": 1536}]),\n)\n')),(0,o.kt)("h3",{id:"step-7-ask-a-question"},"Step 7: Ask a Question."),(0,o.kt)("p",null,"After processing the document, we can proceed to pose a question. We will use ",(0,o.kt)("a",{parentName:"p",href:"https://microsoft.github.io/SynapseML/docs/Explore%20Algorithms/OpenAI/Quickstart%20-%20OpenAI%20Embedding/"},"SynapseML")," to convert the user's question into an embedding and then utilize cosine similarity to retrieve the top K document chunks that closely match the user's question. It's worth mentioning that alternative similarity metrics can also be employed."),(0,o.kt)("pre",null,(0,o.kt)("code",{parentName:"pre",className:"language-python"},'user_question = "What did the astronaut Edgar Mitchell call Earth?"\nretrieve_k = 2 # Retrieve the top 2 documents from vector database\n')),(0,o.kt)("pre",null,(0,o.kt)("code",{parentName:"pre",className:"language-python"},'import requests\n\n# Ask a question and convert to embeddings\n\n\ndef gen_question_embedding(user_question):\n # Convert question to embedding using synapseML\n from synapse.ml.services.openai import OpenAIEmbedding\n\n df_ques = spark.createDataFrame([(user_question, 1)], ["questions", "dummy"])\n embedding = (\n OpenAIEmbedding()\n .setSubscriptionKey(aoai_key)\n .setDeploymentName(aoai_deployment_name_embeddings)\n .setCustomServiceName(aoai_service_name)\n .setTextCol("questions")\n .setErrorCol("errorQ")\n .setOutputCol("embeddings")\n )\n df_ques_embeddings = embedding.transform(df_ques)\n row = df_ques_embeddings.collect()[0]\n question_embedding = row.embeddings.tolist()\n return question_embedding\n\n\ndef retrieve_k_chunk(k, question_embedding):\n # Retrieve the top K entries\n url = f"https://{cogsearch_name}.search.windows.net/indexes/{cogsearch_index_name}/docs/search?api-version=2023-07-01-Preview"\n\n payload = json.dumps(\n {"vector": {"value": question_embedding, "fields": "embeddings", "k": k}}\n )\n headers = {\n "Content-Type": "application/json",\n "api-key": cogsearch_api_key,\n }\n\n response = requests.request("POST", url, headers=headers, data=payload)\n output = json.loads(response.text)\n print(response.status_code)\n return output\n\n\n# Generate embeddings for the question and retrieve the top k document chunks\nquestion_embedding = gen_question_embedding(user_question)\noutput = retrieve_k_chunk(retrieve_k, question_embedding)\n')),(0,o.kt)("h3",{id:"step-8-respond-to-a-users-question"},"Step 8: Respond to a User\u2019s Question."),(0,o.kt)("p",null,"To provide a response to the user's question, we will utilize the ",(0,o.kt)("a",{parentName:"p",href:"https://python.langchain.com/en/latest/index.html"},"LangChain")," framework. With the LangChain framework we will augment the retrieved documents with respect to the user's question. Following this, we can request a response to the user's question from our framework."),(0,o.kt)("pre",null,(0,o.kt)("code",{parentName:"pre",className:"language-python"},'# Import necenssary libraries and setting up OpenAI\nfrom langchain.llms import AzureOpenAI\nfrom langchain import PromptTemplate\nfrom langchain.chains import LLMChain\nimport openai\n\nopenai.api_type = "azure"\nopenai.api_base = aoai_endpoint\nopenai.api_version = "2022-12-01"\nopenai.api_key = aoai_key\n')),(0,o.kt)("p",null,'We can now wrap up the Q&A journey by asking a question and checking the answer. You will see that Edgar Mitchell called Earth "a sparkling blue and white jewel"!'),(0,o.kt)("pre",null,(0,o.kt)("code",{parentName:"pre",className:"language-python"},'# Define a Question Answering chain function using LangChain\ndef qa_chain_func():\n\n # Define llm model\n llm = AzureOpenAI(\n deployment_name=aoai_deployment_name_query,\n model_name=aoai_model_name_query,\n openai_api_key=aoai_key,\n openai_api_version="2022-12-01",\n )\n\n # Write a preprompt with context and query as variables\n template = """\n context :{context}\n Answer the question based on the context above. If the\n information to answer the question is not present in the given context then reply "I don\'t know".\n Question: {query}\n Answer: """\n\n # Define a prompt template\n prompt_template = PromptTemplate(\n input_variables=["context", "query"], template=template\n )\n # Define a chain\n qa_chain = LLMChain(llm=llm, prompt=prompt_template)\n return qa_chain\n\n\n# Concatenate the content of retrieved documents\ncontext = [i["chunk"] for i in output["value"]]\n\n# Make a Quesion Answer chain function and pass\nqa_chain = qa_chain_func()\nanswer = qa_chain.run({"context": context, "query": user_question})\n\nprint(answer)\n')))}d.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/c0391845.f0bddc20.js b/assets/js/c0391845.f0bddc20.js new file mode 100644 index 0000000000..67dc9a0ba6 --- /dev/null +++ b/assets/js/c0391845.f0bddc20.js @@ -0,0 +1 @@ +"use strict";(self.webpackChunksynapseml=self.webpackChunksynapseml||[]).push([[23234],{3905:(e,t,n)=>{n.d(t,{Zo:()=>l,kt:()=>m});var a=n(67294);function o(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function r(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);t&&(a=a.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,a)}return n}function s(e){for(var t=1;t=0||(o[n]=e[n]);return o}(e,t);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(o[n]=e[n])}return o}var p=a.createContext({}),c=function(e){var t=a.useContext(p),n=t;return e&&(n="function"==typeof e?e(t):s(s({},t),e)),n},l=function(e){var t=c(e.components);return a.createElement(p.Provider,{value:t},e.children)},d={inlineCode:"code",wrapper:function(e){var t=e.children;return a.createElement(a.Fragment,{},t)}},u=a.forwardRef((function(e,t){var n=e.components,o=e.mdxType,r=e.originalType,p=e.parentName,l=i(e,["components","mdxType","originalType","parentName"]),u=c(n),m=o,h=u["".concat(p,".").concat(m)]||u[m]||d[m]||r;return n?a.createElement(h,s(s({ref:t},l),{},{components:n})):a.createElement(h,s({ref:t},l))}));function m(e,t){var n=arguments,o=t&&t.mdxType;if("string"==typeof e||o){var r=n.length,s=new Array(r);s[0]=u;var i={};for(var p in t)hasOwnProperty.call(t,p)&&(i[p]=t[p]);i.originalType=e,i.mdxType="string"==typeof e?e:o,s[1]=i;for(var c=2;c{n.r(t),n.d(t,{assets:()=>p,contentTitle:()=>s,default:()=>d,frontMatter:()=>r,metadata:()=>i,toc:()=>c});var a=n(83117),o=(n(67294),n(3905));const r={title:"Quickstart - Document Question and Answering with PDFs",hide_title:!0,status:"stable"},s="A Guide to Q&A on PDF Documents",i={unversionedId:"Explore Algorithms/AI Services/Quickstart - Document Question and Answering with PDFs",id:"Explore Algorithms/AI Services/Quickstart - Document Question and Answering with PDFs",title:"Quickstart - Document Question and Answering with PDFs",description:"Introduction",source:"@site/docs/Explore Algorithms/AI Services/Quickstart - Document Question and Answering with PDFs.md",sourceDirName:"Explore Algorithms/AI Services",slug:"/Explore Algorithms/AI Services/Quickstart - Document Question and Answering with PDFs",permalink:"/SynapseML/docs/next/Explore Algorithms/AI Services/Quickstart - Document Question and Answering with PDFs",draft:!1,tags:[],version:"current",frontMatter:{title:"Quickstart - Document Question and Answering with PDFs",hide_title:!0,status:"stable"},sidebar:"docs",previous:{title:"Quickstart - Create Audiobooks",permalink:"/SynapseML/docs/next/Explore Algorithms/AI Services/Quickstart - Create Audiobooks"},next:{title:"Quickstart - Flooding Risk",permalink:"/SynapseML/docs/next/Explore Algorithms/AI Services/Quickstart - Flooding Risk"}},p={},c=[{value:"Introduction",id:"introduction",level:2},{value:"Step 1: Provide the keys for Azure AI Services and Azure OpenAI to authenticate the applications.",id:"step-1-provide-the-keys-for-azure-ai-services-and-azure-openai-to-authenticate-the-applications",level:3},{value:"Step 2: Load the PDF documents into a Spark DataFrame.",id:"step-2-load-the-pdf-documents-into-a-spark-dataframe",level:3},{value:"Display the raw data from the PDF documents",id:"display-the-raw-data-from-the-pdf-documents",level:5},{value:"Step 3: Read the documents using Azure AI Document Intelligence.",id:"step-3-read-the-documents-using-azure-ai-document-intelligence",level:3},{value:"Step 4: Split the documents into chunks.",id:"step-4-split-the-documents-into-chunks",level:3},{value:"Step 5: Generate Embeddings.",id:"step-5-generate-embeddings",level:3},{value:"Step 6: Store the embeddings in Azure Cognitive Search Vector Store.",id:"step-6-store-the-embeddings-in-azure-cognitive-search-vector-store",level:3},{value:"Step 7: Ask a Question.",id:"step-7-ask-a-question",level:3},{value:"Step 8: Respond to a User\u2019s Question.",id:"step-8-respond-to-a-users-question",level:3}],l={toc:c};function d(e){let{components:t,...n}=e;return(0,o.kt)("wrapper",(0,a.Z)({},l,n,{components:t,mdxType:"MDXLayout"}),(0,o.kt)("h1",{id:"a-guide-to-qa-on-pdf-documents"},"A Guide to Q&A on PDF Documents"),(0,o.kt)("h2",{id:"introduction"},"Introduction"),(0,o.kt)("p",null,"In this notebook, we'll demonstrate how to develop a context-aware question answering framework for any form of a document using ",(0,o.kt)("a",{parentName:"p",href:"https://azure.microsoft.com/products/ai-services/openai-service"},"OpenAI models"),", ",(0,o.kt)("a",{parentName:"p",href:"https://microsoft.github.io/SynapseML/"},"SynapseML")," and ",(0,o.kt)("a",{parentName:"p",href:"https://azure.microsoft.com/products/ai-services/"},"Azure AI Services"),". In this notebook, we assume that PDF documents are the source of data, however, the same framework can be easiy extended to other document formats too. "),(0,o.kt)("p",null,"We\u2019ll cover the following key steps:"),(0,o.kt)("ol",null,(0,o.kt)("li",{parentName:"ol"},"Preprocessing PDF Documents: Learn how to load the PDF documents into a Spark DataFrame, read the documents using the ",(0,o.kt)("a",{parentName:"li",href:"https://azure.microsoft.com/products/ai-services/ai-document-intelligence"},"Azure AI Document Intelligence")," in Azure AI Services, and use SynapseML to split the documents into chunks."),(0,o.kt)("li",{parentName:"ol"},"Embedding Generation and Storage: Learn how to generate embeddings for the chunks using SynapseML and ",(0,o.kt)("a",{parentName:"li",href:"https://azure.microsoft.com/products/ai-services/openai-service"},"Azure OpenAI Services"),", store the embeddings in a vector store using ",(0,o.kt)("a",{parentName:"li",href:"https://azure.microsoft.com/products/search"},"Azure Cognitive Search"),", and search the vector store to answer the user\u2019s question."),(0,o.kt)("li",{parentName:"ol"},"Question Answering Pipeline: Learn how to retrieve relevant document based on the user\u2019s question and provide the answer using ",(0,o.kt)("a",{parentName:"li",href:"https://python.langchain.com/en/latest/index.html#"},"Langchain"),".")),(0,o.kt)("p",null,"We start by installing the necessary python libraries."),(0,o.kt)("pre",null,(0,o.kt)("code",{parentName:"pre",className:"language-python"},"%pip install openai==0.28.1 langchain==0.0.331\n")),(0,o.kt)("h3",{id:"step-1-provide-the-keys-for-azure-ai-services-and-azure-openai-to-authenticate-the-applications"},"Step 1: Provide the keys for Azure AI Services and Azure OpenAI to authenticate the applications."),(0,o.kt)("p",null,"To authenticate Azure AI Services and Azure OpenAI applications, you need to provide the respective API keys. Here is an example of how you can provide the keys in Python code. ",(0,o.kt)("inlineCode",{parentName:"p"},"find_secret()")," function uses Azure Keyvault to get the API keys, however you can directly paste your own keys there."),(0,o.kt)("pre",null,(0,o.kt)("code",{parentName:"pre",className:"language-python"},'from pyspark.sql import SparkSession\nfrom synapse.ml.core.platform import find_secret\n\nai_services_key = find_secret(\n secret_name="ai-services-api-key", keyvault="mmlspark-build-keys"\n)\nai_services_location = "eastus"\n\n# Fill in the following lines with your Azure service information\naoai_service_name = "synapseml-openai"\naoai_endpoint = f"https://{aoai_service_name}.openai.azure.com/"\naoai_key = find_secret(secret_name="openai-api-key", keyvault="mmlspark-build-keys")\naoai_deployment_name_embeddings = "text-embedding-ada-002"\naoai_deployment_name_query = "text-davinci-003"\naoai_model_name_query = "text-davinci-003"\n\n# Azure Cognitive Search\ncogsearch_name = "mmlspark-azure-search"\ncogsearch_index_name = "examplevectorindex"\ncogsearch_api_key = find_secret(\n secret_name="azure-search-key", keyvault="mmlspark-build-keys"\n)\n')),(0,o.kt)("h3",{id:"step-2-load-the-pdf-documents-into-a-spark-dataframe"},"Step 2: Load the PDF documents into a Spark DataFrame."),(0,o.kt)("p",null,"For this tutorial, we will be using NASA's ",(0,o.kt)("a",{parentName:"p",href:"https://www.nasa.gov/sites/default/files/atoms/files/earth_book_2019_tagged.pdf"},"Earth")," and ",(0,o.kt)("a",{parentName:"p",href:"https://www.nasa.gov/sites/default/files/atoms/files/earth_at_night_508.pdf"},"Earth at Night")," e-books. To load PDF documents into a Spark DataFrame, you can use the ",(0,o.kt)("inlineCode",{parentName:"p"},'spark.read.format("binaryFile")')," method provided by Apache Spark."),(0,o.kt)("pre",null,(0,o.kt)("code",{parentName:"pre",className:"language-python"},'from pyspark.sql.functions import udf\nfrom pyspark.sql.types import StringType\n\ndocument_path = "wasbs://publicwasb@mmlspark.blob.core.windows.net/NASAEarth" # path to your document\ndf = spark.read.format("binaryFile").load(document_path).limit(10).cache()\n')),(0,o.kt)("p",null,"This code will read the PDF documents and create a Spark DataFrame named df with the contents of the PDFs. The DataFrame will have a schema that represents the structure of the PDF documents, including their textual content."),(0,o.kt)("p",null,"Let's take a glimpse at the contents of the e-books we are working with. Below are some screenshots that showcase the essence of the books; as you can see they contain information about the Earth."),(0,o.kt)("img",{src:"https://mmlspark.blob.core.windows.net/graphics/notebooks/NASAearthbook_screenshot.png",width:"500"}),(0,o.kt)("img",{src:"https://mmlspark.blob.core.windows.net/graphics/notebooks/NASAearthatnight_screenshot.png",width:"460"}),(0,o.kt)("h5",{id:"display-the-raw-data-from-the-pdf-documents"},"Display the raw data from the PDF documents"),(0,o.kt)("pre",null,(0,o.kt)("code",{parentName:"pre",className:"language-python"},'# Show the dataframe without the content\ndisplay(df.drop("content"))\n')),(0,o.kt)("h3",{id:"step-3-read-the-documents-using-azure-ai-document-intelligence"},"Step 3: Read the documents using Azure AI Document Intelligence."),(0,o.kt)("p",null,"We utilize ",(0,o.kt)("a",{parentName:"p",href:"https://microsoft.github.io/SynapseML/"},"SynapseML"),", an ecosystem of tools designed to enhance the distributed computing framework ",(0,o.kt)("a",{parentName:"p",href:"https://github.com/apache/spark"},"Apache Spark"),". SynapseML introduces advanced networking capabilities to the Spark ecosystem and offers user-friendly SparkML transformers for various ",(0,o.kt)("a",{parentName:"p",href:"https://azure.microsoft.com/products/ai-services"},"Azure AI Services"),"."),(0,o.kt)("p",null,'Additionally, we employ AnalyzeDocument from Azure AI Services to extract the complete document content and present it in the designated columns called "output_content" and "paragraph."'),(0,o.kt)("pre",null,(0,o.kt)("code",{parentName:"pre",className:"language-python"},'from synapse.ml.services.form import AnalyzeDocument\nfrom pyspark.sql.functions import col\n\nanalyze_document = (\n AnalyzeDocument()\n .setPrebuiltModelId("prebuilt-layout")\n .setSubscriptionKey(ai_services_key)\n .setLocation(ai_services_location)\n .setImageBytesCol("content")\n .setOutputCol("result")\n .setPages(\n "1-15"\n ) # Here we are reading the first 15 pages of the documents for demo purposes\n)\n\nanalyzed_df = (\n analyze_document.transform(df)\n .withColumn("output_content", col("result.analyzeResult.content"))\n .withColumn("paragraphs", col("result.analyzeResult.paragraphs"))\n).cache()\n')),(0,o.kt)("p",null,"We can observe the analayzed Spark DataFrame named ",(0,o.kt)("inlineCode",{parentName:"p"},"analyzed_df"),' using the following code. Note that we drop the "content" column as it is not needed anymore.'),(0,o.kt)("pre",null,(0,o.kt)("code",{parentName:"pre",className:"language-python"},'analyzed_df = analyzed_df.drop("content")\ndisplay(analyzed_df)\n')),(0,o.kt)("h3",{id:"step-4-split-the-documents-into-chunks"},"Step 4: Split the documents into chunks."),(0,o.kt)("p",null,"After analyzing the document, we leverage SynapseML\u2019s PageSplitter to divide the documents into smaller sections, which are subsequently stored in the \u201cchunks\u201d column. This allows for more granular representation and processing of the document content."),(0,o.kt)("pre",null,(0,o.kt)("code",{parentName:"pre",className:"language-python"},'from synapse.ml.featurize.text import PageSplitter\n\nps = (\n PageSplitter()\n .setInputCol("output_content")\n .setMaximumPageLength(4000)\n .setMinimumPageLength(3000)\n .setOutputCol("chunks")\n)\n\nsplitted_df = ps.transform(analyzed_df)\ndisplay(splitted_df)\n')),(0,o.kt)("p",null,"Note that the chunks for each document are presented in a single row inside an array. In order to embed all the chunks in the following cells, we need to have each chunk in a separate row. To accomplish that, we first explode these arrays so there is only one chunk in each row, then filter the Spark DataFrame in order to only keep the path to the document and the chunk in a single row."),(0,o.kt)("pre",null,(0,o.kt)("code",{parentName:"pre",className:"language-python"},'# Each column contains many chunks for the same document as a vector.\n# Explode will distribute and replicate the content of a vecor across multple rows\nfrom pyspark.sql.functions import explode, col\n\nexploded_df = splitted_df.select("path", explode(col("chunks")).alias("chunk")).select(\n "path", "chunk"\n)\ndisplay(exploded_df)\n')),(0,o.kt)("h3",{id:"step-5-generate-embeddings"},"Step 5: Generate Embeddings."),(0,o.kt)("p",null,"To produce embeddings for each chunk, we utilize both SynapseML and Azure OpenAI Service. By integrating the Azure OpenAI service with SynapseML, we can leverage the power of the Apache Spark distributed computing framework to process numerous prompts using the OpenAI service. This integration enables the SynapseML embedding client to generate embeddings in a distributed manner, enabling efficient processing of large volumes of data. If you're interested in applying large language models at a distributed scale using Azure OpenAI and Azure Synapse Analytics, you can refer to ",(0,o.kt)("a",{parentName:"p",href:"https://microsoft.github.io/SynapseML/docs/Explore%20Algorithms/OpenAI/"},"this approach"),". For more detailed information on generating embeddings with Azure OpenAI, you can look ",(0,o.kt)("a",{parentName:"p",href:"https://learn.microsoft.com/azure/cognitive-services/openai/how-to/embeddings?tabs=console"},"here"),"."),(0,o.kt)("pre",null,(0,o.kt)("code",{parentName:"pre",className:"language-python"},'from synapse.ml.services.openai import OpenAIEmbedding\n\nembedding = (\n OpenAIEmbedding()\n .setSubscriptionKey(aoai_key)\n .setDeploymentName(aoai_deployment_name_embeddings)\n .setCustomServiceName(aoai_service_name)\n .setTextCol("chunk")\n .setErrorCol("error")\n .setOutputCol("embeddings")\n)\n\ndf_embeddings = embedding.transform(exploded_df)\n\ndisplay(df_embeddings)\n')),(0,o.kt)("h3",{id:"step-6-store-the-embeddings-in-azure-cognitive-search-vector-store"},"Step 6: Store the embeddings in Azure Cognitive Search Vector Store."),(0,o.kt)("p",null,(0,o.kt)("a",{parentName:"p",href:"https://learn.microsoft.com/azure/search/search-what-is-azure-search"},"Azure Cognitive Search")," offers a user-friendly interface for creating a vector database, as well as storing and retrieving data using vector search. If you're interested in learning more about vector search, you can look ",(0,o.kt)("a",{parentName:"p",href:"https://github.com/Azure/cognitive-search-vector-pr/tree/main"},"here"),"."),(0,o.kt)("p",null,"Storing data in the AzureCogSearch vector database involves two main steps:"),(0,o.kt)("p",null,"Creating the Index: The first step is to establish the index or schema of the vector database. This entails defining the structure and properties of the data that will be stored and indexed in the vector database."),(0,o.kt)("p",null,"Adding Chunked Documents and Embeddings: The second step involves adding the chunked documents, along with their corresponding embeddings, to the vector datastore. This allows for efficient storage and retrieval of the data using vector search capabilities."),(0,o.kt)("p",null,"By following these steps, you can effectively store your chunked documents and their associated embeddings in the AzureCogSearch vector database, enabling seamless retrieval of relevant information through vector search functionality."),(0,o.kt)("pre",null,(0,o.kt)("code",{parentName:"pre",className:"language-python"},'from pyspark.sql.functions import monotonically_increasing_id\nfrom pyspark.sql.functions import lit\n\ndf_embeddings = (\n df_embeddings.drop("error")\n .withColumn(\n "idx", monotonically_increasing_id().cast("string")\n ) # create index ID for ACS\n .withColumn("searchAction", lit("upload"))\n)\n')),(0,o.kt)("pre",null,(0,o.kt)("code",{parentName:"pre",className:"language-python"},'from synapse.ml.services import writeToAzureSearch\nimport json\n\ndf_embeddings.writeToAzureSearch(\n subscriptionKey=cogsearch_api_key,\n actionCol="searchAction",\n serviceName=cogsearch_name,\n indexName=cogsearch_index_name,\n keyCol="idx",\n vectorCols=json.dumps([{"name": "embeddings", "dimension": 1536}]),\n)\n')),(0,o.kt)("h3",{id:"step-7-ask-a-question"},"Step 7: Ask a Question."),(0,o.kt)("p",null,"After processing the document, we can proceed to pose a question. We will use ",(0,o.kt)("a",{parentName:"p",href:"https://microsoft.github.io/SynapseML/docs/Explore%20Algorithms/OpenAI/Quickstart%20-%20OpenAI%20Embedding/"},"SynapseML")," to convert the user's question into an embedding and then utilize cosine similarity to retrieve the top K document chunks that closely match the user's question. It's worth mentioning that alternative similarity metrics can also be employed."),(0,o.kt)("pre",null,(0,o.kt)("code",{parentName:"pre",className:"language-python"},'user_question = "What did the astronaut Edgar Mitchell call Earth?"\nretrieve_k = 2 # Retrieve the top 2 documents from vector database\n')),(0,o.kt)("pre",null,(0,o.kt)("code",{parentName:"pre",className:"language-python"},'import requests\n\n# Ask a question and convert to embeddings\n\n\ndef gen_question_embedding(user_question):\n # Convert question to embedding using synapseML\n from synapse.ml.services.openai import OpenAIEmbedding\n\n df_ques = spark.createDataFrame([(user_question, 1)], ["questions", "dummy"])\n embedding = (\n OpenAIEmbedding()\n .setSubscriptionKey(aoai_key)\n .setDeploymentName(aoai_deployment_name_embeddings)\n .setCustomServiceName(aoai_service_name)\n .setTextCol("questions")\n .setErrorCol("errorQ")\n .setOutputCol("embeddings")\n )\n df_ques_embeddings = embedding.transform(df_ques)\n row = df_ques_embeddings.collect()[0]\n question_embedding = row.embeddings.tolist()\n return question_embedding\n\n\ndef retrieve_k_chunk(k, question_embedding):\n # Retrieve the top K entries\n url = f"https://{cogsearch_name}.search.windows.net/indexes/{cogsearch_index_name}/docs/search?api-version=2023-07-01-Preview"\n\n payload = json.dumps(\n {"vector": {"value": question_embedding, "fields": "embeddings", "k": k}}\n )\n headers = {\n "Content-Type": "application/json",\n "api-key": cogsearch_api_key,\n }\n\n response = requests.request("POST", url, headers=headers, data=payload)\n output = json.loads(response.text)\n print(response.status_code)\n return output\n\n\n# Generate embeddings for the question and retrieve the top k document chunks\nquestion_embedding = gen_question_embedding(user_question)\noutput = retrieve_k_chunk(retrieve_k, question_embedding)\n')),(0,o.kt)("h3",{id:"step-8-respond-to-a-users-question"},"Step 8: Respond to a User\u2019s Question."),(0,o.kt)("p",null,"To provide a response to the user's question, we will utilize the ",(0,o.kt)("a",{parentName:"p",href:"https://python.langchain.com/en/latest/index.html"},"LangChain")," framework. With the LangChain framework we will augment the retrieved documents with respect to the user's question. Following this, we can request a response to the user's question from our framework."),(0,o.kt)("pre",null,(0,o.kt)("code",{parentName:"pre",className:"language-python"},'# Import necenssary libraries and setting up OpenAI\nfrom langchain.llms import AzureOpenAI\nfrom langchain import PromptTemplate\nfrom langchain.chains import LLMChain\nimport openai\n\nopenai.api_type = "azure"\nopenai.api_base = aoai_endpoint\nopenai.api_version = "2022-12-01"\nopenai.api_key = aoai_key\n')),(0,o.kt)("p",null,'We can now wrap up the Q&A journey by asking a question and checking the answer. You will see that Edgar Mitchell called Earth "a sparkling blue and white jewel"!'),(0,o.kt)("pre",null,(0,o.kt)("code",{parentName:"pre",className:"language-python"},'# Define a Question Answering chain function using LangChain\ndef qa_chain_func():\n\n # Define llm model\n llm = AzureOpenAI(\n deployment_name=aoai_deployment_name_query,\n model_name=aoai_model_name_query,\n openai_api_key=aoai_key,\n openai_api_version="2022-12-01",\n )\n\n # Write a preprompt with context and query as variables\n template = """\n context :{context}\n Answer the question based on the context above. If the\n information to answer the question is not present in the given context then reply "I don\'t know".\n Question: {query}\n Answer: """\n\n # Define a prompt template\n prompt_template = PromptTemplate(\n input_variables=["context", "query"], template=template\n )\n # Define a chain\n qa_chain = LLMChain(llm=llm, prompt=prompt_template)\n return qa_chain\n\n\n# Concatenate the content of retrieved documents\ncontext = [i["chunk"] for i in output["value"]]\n\n# Make a Quesion Answer chain function and pass\nqa_chain = qa_chain_func()\nanswer = qa_chain.run({"context": context, "query": user_question})\n\nprint(answer)\n')))}d.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/dd3f9ada.e028f5db.js b/assets/js/dd3f9ada.7a974bf8.js similarity index 66% rename from assets/js/dd3f9ada.e028f5db.js rename to assets/js/dd3f9ada.7a974bf8.js index ef09bddd99..bd3359ae35 100644 --- a/assets/js/dd3f9ada.e028f5db.js +++ b/assets/js/dd3f9ada.7a974bf8.js @@ -1 +1 @@ -"use strict";(self.webpackChunksynapseml=self.webpackChunksynapseml||[]).push([[17820],{3905:(e,t,n)=>{n.d(t,{Zo:()=>d,kt:()=>u});var r=n(67294);function a(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function i(e){for(var t=1;t=0||(a[n]=e[n]);return a}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(a[n]=e[n])}return a}var l=r.createContext({}),p=function(e){var t=r.useContext(l),n=t;return e&&(n="function"==typeof e?e(t):i(i({},t),e)),n},d=function(e){var t=p(e.components);return r.createElement(l.Provider,{value:t},e.children)},m={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},c=r.forwardRef((function(e,t){var n=e.components,a=e.mdxType,o=e.originalType,l=e.parentName,d=s(e,["components","mdxType","originalType","parentName"]),c=p(n),u=a,g=c["".concat(l,".").concat(u)]||c[u]||m[u]||o;return n?r.createElement(g,i(i({ref:t},d),{},{components:n})):r.createElement(g,i({ref:t},d))}));function u(e,t){var n=arguments,a=t&&t.mdxType;if("string"==typeof e||a){var o=n.length,i=new Array(o);i[0]=c;var s={};for(var l in t)hasOwnProperty.call(t,l)&&(s[l]=t[l]);s.originalType=e,s.mdxType="string"==typeof e?e:a,i[1]=s;for(var p=2;p{n.r(t),n.d(t,{assets:()=>l,contentTitle:()=>i,default:()=>m,frontMatter:()=>o,metadata:()=>s,toc:()=>p});var r=n(83117),a=(n(67294),n(3905));const o={title:"Quickstart - OpenAI Embedding and GPU based KNN",hide_title:!0,status:"stable"},i="Embedding Text with Azure OpenAI and GPU based KNN",s={unversionedId:"Explore Algorithms/OpenAI/Quickstart - OpenAI Embedding and GPU based KNN",id:"Explore Algorithms/OpenAI/Quickstart - OpenAI Embedding and GPU based KNN",title:"Quickstart - OpenAI Embedding and GPU based KNN",description:'The Azure OpenAI service can be used to solve a large number of natural language tasks through prompting the completion API. To make it easier to scale your prompting workflows from a few examples to large datasets of examples we have integrated the Azure OpenAI service with the distributed machine learning library Spark Rapids ML. This integration makes it easy to use the Apache Spark distributed computing framework to process millions of prompts with the OpenAI service. This tutorial shows how to apply large language models to generate embeddings for large datasets of text. This demo is based on "Quickstart - OpenAI Embedding" notebook with NVIDIA GPU accelerated KNN.',source:"@site/docs/Explore Algorithms/OpenAI/Quickstart - OpenAI Embedding and GPU based KNN.md",sourceDirName:"Explore Algorithms/OpenAI",slug:"/Explore Algorithms/OpenAI/Quickstart - OpenAI Embedding and GPU based KNN",permalink:"/SynapseML/docs/next/Explore Algorithms/OpenAI/Quickstart - OpenAI Embedding and GPU based KNN",draft:!1,tags:[],version:"current",frontMatter:{title:"Quickstart - OpenAI Embedding and GPU based KNN",hide_title:!0,status:"stable"},sidebar:"docs",previous:{title:"Quickstart - OpenAI Embedding",permalink:"/SynapseML/docs/next/Explore Algorithms/OpenAI/Quickstart - OpenAI Embedding"},next:{title:"Quickstart - Understand and Search Forms",permalink:"/SynapseML/docs/next/Explore Algorithms/OpenAI/Quickstart - Understand and Search Forms"}},l={},p=[{value:"Step 1: Prerequisites",id:"step-1-prerequisites",level:2},{value:"Step 2: Import this guide as a notebook",id:"step-2-import-this-guide-as-a-notebook",level:2},{value:"Step 3: Fill in your service information",id:"step-3-fill-in-your-service-information",level:2},{value:"Step 4: Load Data",id:"step-4-load-data",level:2},{value:"Step 5: Generate Embeddings",id:"step-5-generate-embeddings",level:2},{value:"Step 6: Reduce Embedding dimensionality for Visualization",id:"step-6-reduce-embedding-dimensionality-for-visualization",level:2},{value:"Step 7: Plot the embeddings",id:"step-7-plot-the-embeddings",level:2},{value:"Step 8: Build the query against embeddings",id:"step-8-build-the-query-against-embeddings",level:2},{value:"Step 9: Fit KNN model",id:"step-9-fit-knn-model",level:2},{value:"Step 10: Retrieve query results",id:"step-10-retrieve-query-results",level:2}],d={toc:p};function m(e){let{components:t,...n}=e;return(0,a.kt)("wrapper",(0,r.Z)({},d,n,{components:t,mdxType:"MDXLayout"}),(0,a.kt)("h1",{id:"embedding-text-with-azure-openai-and-gpu-based-knn"},"Embedding Text with Azure OpenAI and GPU based KNN"),(0,a.kt)("p",null,"The Azure OpenAI service can be used to solve a large number of natural language tasks through prompting the completion API. To make it easier to scale your prompting workflows from a few examples to large datasets of examples we have integrated the Azure OpenAI service with the distributed machine learning library ",(0,a.kt)("a",{parentName:"p",href:"https://github.com/NVIDIA/spark-rapids-ml/"},"Spark Rapids ML"),". This integration makes it easy to use the ",(0,a.kt)("a",{parentName:"p",href:"https://spark.apache.org/"},"Apache Spark"),' distributed computing framework to process millions of prompts with the OpenAI service. This tutorial shows how to apply large language models to generate embeddings for large datasets of text. This demo is based on "Quickstart - OpenAI Embedding" notebook with NVIDIA GPU accelerated KNN.'),(0,a.kt)("p",null,(0,a.kt)("strong",{parentName:"p"},"Note"),": Running the notebook with the demo dataset (Step 4) will generate the same results as CPU based \u201cQuickstart - OpenAI Embedding\u201d notebook. To see GPU acceleration you need to run query against bigger embeddings.\nFor example, running 100K rows dataset will give 6x acceleration and consume less than 10x memory on 2 nodes NVIDIA T4 cluster compare to AMD Epic (Rome) 2 nodes CPU cluster."),(0,a.kt)("h2",{id:"step-1-prerequisites"},"Step 1: Prerequisites"),(0,a.kt)("p",null,"The key prerequisites for this quickstart include a working Azure OpenAI resource, and an Apache Spark cluster with SynapseML installed. We suggest creating a Synapse workspace, but an Azure Databricks, HDInsight, or Spark on Kubernetes, or even a python environment with the ",(0,a.kt)("inlineCode",{parentName:"p"},"pyspark")," package will work. "),(0,a.kt)("ol",null,(0,a.kt)("li",{parentName:"ol"},"An Azure OpenAI resource \u2013 request access ",(0,a.kt)("a",{parentName:"li",href:"https://customervoice.microsoft.com/Pages/ResponsePage.aspx?id=v4j5cvGGr0GRqy180BHbR7en2Ais5pxKtso_Pz4b1_xUOFA5Qk1UWDRBMjg0WFhPMkIzTzhKQ1dWNyQlQCN0PWcu"},"here")," before ",(0,a.kt)("a",{parentName:"li",href:"https://docs.microsoft.com/en-us/azure/cognitive-services/openai/how-to/create-resource?pivots=web-portal#create-a-resource"},"creating a resource")),(0,a.kt)("li",{parentName:"ol"},(0,a.kt)("a",{parentName:"li",href:"https://docs.microsoft.com/en-us/azure/synapse-analytics/get-started-create-workspace"},"Create a Synapse workspace")),(0,a.kt)("li",{parentName:"ol"},(0,a.kt)("a",{parentName:"li",href:"https://docs.microsoft.com/en-us/azure/synapse-analytics/get-started-analyze-spark#create-a-serverless-apache-spark-pool"},"Create a serverless Apache Spark pool"))),(0,a.kt)("h2",{id:"step-2-import-this-guide-as-a-notebook"},"Step 2: Import this guide as a notebook"),(0,a.kt)("p",null,"The next step is to add this code into your Spark cluster. You can either create a notebook in your Spark platform and copy the code into this notebook to run the demo. Or download the notebook and import it into Synapse Analytics"),(0,a.kt)("ol",null,(0,a.kt)("li",{parentName:"ol"},(0,a.kt)("a",{parentName:"li",href:"https://github.com/microsoft/SynapseML/blob/master/notebooks/features/cognitive_services/CognitiveServices%20-%20OpenAI%20Embedding.ipynb"},"Download this demo as a notebook")," (click Raw, then save the file)"),(0,a.kt)("li",{parentName:"ol"},"Import the notebook ",(0,a.kt)("a",{parentName:"li",href:"https://docs.microsoft.com/en-us/azure/synapse-analytics/spark/apache-spark-development-using-notebooks#create-a-notebook"},"into the Synapse Workspace")," or if using Databricks ",(0,a.kt)("a",{parentName:"li",href:"https://docs.microsoft.com/en-us/azure/databricks/notebooks/notebooks-manage#create-a-notebook"},"into the Databricks Workspace")),(0,a.kt)("li",{parentName:"ol"},"Install SynapseML on your cluster. Please see the installation instructions for Synapse at the bottom of ",(0,a.kt)("a",{parentName:"li",href:"https://microsoft.github.io/SynapseML/"},"the SynapseML website"),". Note that this requires pasting an additional cell at the top of the notebook you just imported"),(0,a.kt)("li",{parentName:"ol"},"To run the notebook on Databricks add related init script (/tools/init_scripts/init-rapidsml-cuda-11.8.sh)"),(0,a.kt)("li",{parentName:"ol"},"Connect your notebook to a cluster and follow along, editing and rnnung the cells below.")),(0,a.kt)("h2",{id:"step-3-fill-in-your-service-information"},"Step 3: Fill in your service information"),(0,a.kt)("p",null,"Next, please edit the cell in the notebook to point to your service. In particular set the ",(0,a.kt)("inlineCode",{parentName:"p"},"service_name"),", ",(0,a.kt)("inlineCode",{parentName:"p"},"deployment_name"),", ",(0,a.kt)("inlineCode",{parentName:"p"},"location"),", and ",(0,a.kt)("inlineCode",{parentName:"p"},"key")," variables to match those for your OpenAI service"),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-python"},'from synapse.ml.core.platform import find_secret\n\n# Fill in the following lines with your service information\n# Learn more about selecting which embedding model to choose: https://openai.com/blog/new-and-improved-embedding-model\nservice_name = "synapseml-openai"\ndeployment_name_embeddings = "text-embedding-ada-002"\n\nkey = find_secret(\n secret_name="openai-api-key", keyvault="mmlspark-build-keys"\n) # please replace this with your key as a string\n\nassert key is not None and service_name is not None\n')),(0,a.kt)("h2",{id:"step-4-load-data"},"Step 4: Load Data"),(0,a.kt)("p",null,"In this demo we will explore a dataset of fine food reviews"),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-python"},'import pyspark.sql.functions as F\n\ndf = (\n spark.read.options(inferSchema="True", delimiter=",", header=True)\n .csv("wasbs://publicwasb@mmlspark.blob.core.windows.net/fine_food_reviews_1k.csv")\n .repartition(5)\n)\n\ndf = df.withColumn(\n "combined",\n F.format_string("Title: %s; Content: %s", F.trim(df.Summary), F.trim(df.Text)),\n)\n\ndisplay(df)\n')),(0,a.kt)("h2",{id:"step-5-generate-embeddings"},"Step 5: Generate Embeddings"),(0,a.kt)("p",null,"We will first generate embeddings for the reviews using the SynapseML OpenAIEmbedding client."),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-python"},'from synapse.ml.services.openai import OpenAIEmbedding\n\nembedding = (\n OpenAIEmbedding()\n .setSubscriptionKey(key)\n .setDeploymentName(deployment_name_embeddings)\n .setCustomServiceName(service_name)\n .setTextCol("combined")\n .setErrorCol("error")\n .setOutputCol("embeddings")\n)\n\ncompleted_df = embedding.transform(df).cache()\ndisplay(completed_df)\n')),(0,a.kt)("h2",{id:"step-6-reduce-embedding-dimensionality-for-visualization"},"Step 6: Reduce Embedding dimensionality for Visualization"),(0,a.kt)("p",null,"We reduce the dimensionality to 2 dimensions using t-SNE decomposition."),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-python"},'import pandas as pd\nfrom sklearn.manifold import TSNE\nimport numpy as np\n\ncollected = list(completed_df.collect())\nmatrix = np.array([[r["embeddings"]] for r in collected])[:, 0, :].astype(np.float64)\nscores = np.array([[r["Score"]] for r in collected]).reshape(-1)\n\ntsne = TSNE(n_components=2, perplexity=15, random_state=42, init="pca")\nvis_dims = tsne.fit_transform(matrix)\nvis_dims.shape\n')),(0,a.kt)("h2",{id:"step-7-plot-the-embeddings"},"Step 7: Plot the embeddings"),(0,a.kt)("p",null,"We now use t-SNE to reduce the dimensionality of the embeddings from 1536 to 2. Once the embeddings are reduced to two dimensions, we can plot them in a 2D scatter plot. We colour each review by its star rating, ranging from red for negative reviews, to green for positive reviews. We can observe a decent data separation even in the reduced 2 dimensions."),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-python"},'import matplotlib.pyplot as plt\nimport matplotlib\nimport numpy as np\n\ncolors = ["red", "darkorange", "gold", "turquoise", "darkgreen"]\nx = [x for x, y in vis_dims]\ny = [y for x, y in vis_dims]\ncolor_indices = scores - 1\n\ncolormap = matplotlib.colors.ListedColormap(colors)\nplt.scatter(x, y, c=color_indices, cmap=colormap, alpha=0.3)\nfor score in [0, 1, 2, 3, 4]:\n avg_x = np.array(x)[scores - 1 == score].mean()\n avg_y = np.array(y)[scores - 1 == score].mean()\n color = colors[score]\n plt.scatter(avg_x, avg_y, marker="x", color=color, s=100)\n\nplt.title("Amazon ratings visualized in language using t-SNE")\n')),(0,a.kt)("h2",{id:"step-8-build-the-query-against-embeddings"},"Step 8: Build the query against embeddings"),(0,a.kt)("p",null,'Note: The data types of the ID columns in the document and query dataframes should be the same. For some OpenAI models, users should use separate models for embedding documents and queries. These models are denoted by the "-doc" and "-query" suffixes respectively.'),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-python"},'from pyspark.ml import PipelineModel\n\nembedding_query = (\n OpenAIEmbedding()\n .setSubscriptionKey(key)\n .setDeploymentName(deployment_name_embeddings)\n .setCustomServiceName(service_name)\n .setTextCol("query")\n .setErrorCol("error")\n .setOutputCol("embeddings")\n)\n\nquery_df = (\n spark.createDataFrame(\n [\n (\n 0,\n "desserts",\n ),\n (\n 1,\n "disgusting",\n ),\n ]\n )\n .toDF("id", "query")\n .withColumn("id", F.col("id").cast("int"))\n)\n\nembedding_query_df = (\n embedding_query.transform(query_df).select("id", "embeddings").cache()\n)\n')),(0,a.kt)("h2",{id:"step-9-fit-knn-model"},"Step 9: Fit KNN model"),(0,a.kt)("p",null,"Build KNN model using fit method"),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-python"},'from spark_rapids_ml.knn import NearestNeighbors\n\nrapids_knn = NearestNeighbors(k=10)\nrapids_knn.setInputCol("embeddings").setIdCol("id")\n\nrapids_knn_model = rapids_knn.fit(completed_df.select("id", "embeddings"))\n')),(0,a.kt)("h2",{id:"step-10-retrieve-query-results"},"Step 10: Retrieve query results"),(0,a.kt)("p",null,"Find k nearest neighbors"),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-python"},"(_, _, knn_df) = rapids_knn_model.kneighbors(embedding_query_df)\n\ndisplay(knn_df)\n")))}m.isMDXComponent=!0}}]); \ No newline at end of file +"use strict";(self.webpackChunksynapseml=self.webpackChunksynapseml||[]).push([[17820],{3905:(e,t,n)=>{n.d(t,{Zo:()=>d,kt:()=>u});var r=n(67294);function a(e,t,n){return t in e?Object.defineProperty(e,t,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[t]=n,e}function o(e,t){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var r=Object.getOwnPropertySymbols(e);t&&(r=r.filter((function(t){return Object.getOwnPropertyDescriptor(e,t).enumerable}))),n.push.apply(n,r)}return n}function i(e){for(var t=1;t=0||(a[n]=e[n]);return a}(e,t);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(r=0;r=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(a[n]=e[n])}return a}var l=r.createContext({}),p=function(e){var t=r.useContext(l),n=t;return e&&(n="function"==typeof e?e(t):i(i({},t),e)),n},d=function(e){var t=p(e.components);return r.createElement(l.Provider,{value:t},e.children)},m={inlineCode:"code",wrapper:function(e){var t=e.children;return r.createElement(r.Fragment,{},t)}},c=r.forwardRef((function(e,t){var n=e.components,a=e.mdxType,o=e.originalType,l=e.parentName,d=s(e,["components","mdxType","originalType","parentName"]),c=p(n),u=a,g=c["".concat(l,".").concat(u)]||c[u]||m[u]||o;return n?r.createElement(g,i(i({ref:t},d),{},{components:n})):r.createElement(g,i({ref:t},d))}));function u(e,t){var n=arguments,a=t&&t.mdxType;if("string"==typeof e||a){var o=n.length,i=new Array(o);i[0]=c;var s={};for(var l in t)hasOwnProperty.call(t,l)&&(s[l]=t[l]);s.originalType=e,s.mdxType="string"==typeof e?e:a,i[1]=s;for(var p=2;p{n.r(t),n.d(t,{assets:()=>l,contentTitle:()=>i,default:()=>m,frontMatter:()=>o,metadata:()=>s,toc:()=>p});var r=n(83117),a=(n(67294),n(3905));const o={title:"Quickstart - OpenAI Embedding and GPU based KNN",hide_title:!0,status:"stable"},i="Embedding Text with Azure OpenAI and GPU based KNN",s={unversionedId:"Explore Algorithms/OpenAI/Quickstart - OpenAI Embedding and GPU based KNN",id:"Explore Algorithms/OpenAI/Quickstart - OpenAI Embedding and GPU based KNN",title:"Quickstart - OpenAI Embedding and GPU based KNN",description:'The Azure OpenAI service can be used to solve a large number of natural language tasks through prompting the completion API. To make it easier to scale your prompting workflows from a few examples to large datasets of examples we have integrated the Azure OpenAI service with the distributed machine learning library Spark Rapids ML. This integration makes it easy to use the Apache Spark distributed computing framework to process millions of prompts with the OpenAI service. This tutorial shows how to apply large language models to generate embeddings for large datasets of text. This demo is based on "Quickstart - OpenAI Embedding" notebook with NVIDIA GPU accelerated KNN.',source:"@site/docs/Explore Algorithms/OpenAI/Quickstart - OpenAI Embedding and GPU based KNN.md",sourceDirName:"Explore Algorithms/OpenAI",slug:"/Explore Algorithms/OpenAI/Quickstart - OpenAI Embedding and GPU based KNN",permalink:"/SynapseML/docs/next/Explore Algorithms/OpenAI/Quickstart - OpenAI Embedding and GPU based KNN",draft:!1,tags:[],version:"current",frontMatter:{title:"Quickstart - OpenAI Embedding and GPU based KNN",hide_title:!0,status:"stable"},sidebar:"docs",previous:{title:"Quickstart - OpenAI Embedding",permalink:"/SynapseML/docs/next/Explore Algorithms/OpenAI/Quickstart - OpenAI Embedding"},next:{title:"Quickstart - Understand and Search Forms",permalink:"/SynapseML/docs/next/Explore Algorithms/OpenAI/Quickstart - Understand and Search Forms"}},l={},p=[{value:"Step 1: Prerequisites",id:"step-1-prerequisites",level:2},{value:"Step 2: Import this guide as a notebook",id:"step-2-import-this-guide-as-a-notebook",level:2},{value:"Step 3: Fill in your service information",id:"step-3-fill-in-your-service-information",level:2},{value:"Step 4: Load Data",id:"step-4-load-data",level:2},{value:"Step 5: Generate Embeddings",id:"step-5-generate-embeddings",level:2},{value:"Step 6: Reduce Embedding dimensionality for Visualization",id:"step-6-reduce-embedding-dimensionality-for-visualization",level:2},{value:"Step 7: Plot the embeddings",id:"step-7-plot-the-embeddings",level:2},{value:"Step 8: Build the query against embeddings",id:"step-8-build-the-query-against-embeddings",level:2},{value:"Step 9: Fit KNN model",id:"step-9-fit-knn-model",level:2},{value:"Step 10: Retrieve query results",id:"step-10-retrieve-query-results",level:2}],d={toc:p};function m(e){let{components:t,...n}=e;return(0,a.kt)("wrapper",(0,r.Z)({},d,n,{components:t,mdxType:"MDXLayout"}),(0,a.kt)("h1",{id:"embedding-text-with-azure-openai-and-gpu-based-knn"},"Embedding Text with Azure OpenAI and GPU based KNN"),(0,a.kt)("p",null,"The Azure OpenAI service can be used to solve a large number of natural language tasks through prompting the completion API. To make it easier to scale your prompting workflows from a few examples to large datasets of examples we have integrated the Azure OpenAI service with the distributed machine learning library ",(0,a.kt)("a",{parentName:"p",href:"https://github.com/NVIDIA/spark-rapids-ml/"},"Spark Rapids ML"),". This integration makes it easy to use the ",(0,a.kt)("a",{parentName:"p",href:"https://spark.apache.org/"},"Apache Spark"),' distributed computing framework to process millions of prompts with the OpenAI service. This tutorial shows how to apply large language models to generate embeddings for large datasets of text. This demo is based on "Quickstart - OpenAI Embedding" notebook with NVIDIA GPU accelerated KNN.'),(0,a.kt)("p",null,(0,a.kt)("strong",{parentName:"p"},"Note"),": Running the notebook with the demo dataset (Step 4) will generate the same results as CPU based \u201cQuickstart - OpenAI Embedding\u201d notebook. To see GPU acceleration you need to run query against bigger embeddings.\nFor example, running 100K rows dataset will give 6x acceleration and consume less than 10x memory on 2 nodes NVIDIA T4 cluster compare to AMD Epic (Rome) 2 nodes CPU cluster."),(0,a.kt)("h2",{id:"step-1-prerequisites"},"Step 1: Prerequisites"),(0,a.kt)("p",null,"The key prerequisites for this quickstart include a working Azure OpenAI resource, and an Apache Spark cluster with SynapseML installed. We suggest creating a Synapse workspace, but an Azure Databricks, HDInsight, or Spark on Kubernetes, or even a python environment with the ",(0,a.kt)("inlineCode",{parentName:"p"},"pyspark")," package will work. "),(0,a.kt)("ol",null,(0,a.kt)("li",{parentName:"ol"},"An Azure OpenAI resource \u2013 request access ",(0,a.kt)("a",{parentName:"li",href:"https://customervoice.microsoft.com/Pages/ResponsePage.aspx?id=v4j5cvGGr0GRqy180BHbR7en2Ais5pxKtso_Pz4b1_xUOFA5Qk1UWDRBMjg0WFhPMkIzTzhKQ1dWNyQlQCN0PWcu"},"here")," before ",(0,a.kt)("a",{parentName:"li",href:"https://docs.microsoft.com/en-us/azure/cognitive-services/openai/how-to/create-resource?pivots=web-portal#create-a-resource"},"creating a resource")),(0,a.kt)("li",{parentName:"ol"},(0,a.kt)("a",{parentName:"li",href:"https://docs.microsoft.com/en-us/azure/synapse-analytics/get-started-create-workspace"},"Create a Synapse workspace")),(0,a.kt)("li",{parentName:"ol"},(0,a.kt)("a",{parentName:"li",href:"https://docs.microsoft.com/en-us/azure/synapse-analytics/get-started-analyze-spark#create-a-serverless-apache-spark-pool"},"Create a serverless Apache Spark pool"))),(0,a.kt)("h2",{id:"step-2-import-this-guide-as-a-notebook"},"Step 2: Import this guide as a notebook"),(0,a.kt)("p",null,"The next step is to add this code into your Spark cluster. You can either create a notebook in your Spark platform and copy the code into this notebook to run the demo. Or download the notebook and import it into Synapse Analytics"),(0,a.kt)("ol",null,(0,a.kt)("li",{parentName:"ol"},(0,a.kt)("a",{parentName:"li",href:"https://github.com/microsoft/SynapseML/blob/master/notebooks/features/cognitive_services/CognitiveServices%20-%20OpenAI%20Embedding.ipynb"},"Download this demo as a notebook")," (click Raw, then save the file)"),(0,a.kt)("li",{parentName:"ol"},"Import the notebook ",(0,a.kt)("a",{parentName:"li",href:"https://docs.microsoft.com/en-us/azure/synapse-analytics/spark/apache-spark-development-using-notebooks#create-a-notebook"},"into the Synapse Workspace")," or if using Databricks ",(0,a.kt)("a",{parentName:"li",href:"https://docs.microsoft.com/en-us/azure/databricks/notebooks/notebooks-manage#create-a-notebook"},"into the Databricks Workspace")),(0,a.kt)("li",{parentName:"ol"},"Install SynapseML on your cluster. Please see the installation instructions for Synapse at the bottom of ",(0,a.kt)("a",{parentName:"li",href:"https://microsoft.github.io/SynapseML/"},"the SynapseML website"),". Note that this requires pasting an additional cell at the top of the notebook you just imported"),(0,a.kt)("li",{parentName:"ol"},"To run the notebook on Databricks add related init script (/tools/init_scripts/init-rapidsml-cuda-11.8.sh)"),(0,a.kt)("li",{parentName:"ol"},"Connect your notebook to a cluster and follow along, editing and rnnung the cells below.")),(0,a.kt)("h2",{id:"step-3-fill-in-your-service-information"},"Step 3: Fill in your service information"),(0,a.kt)("p",null,"Next, please edit the cell in the notebook to point to your service. In particular set the ",(0,a.kt)("inlineCode",{parentName:"p"},"service_name"),", ",(0,a.kt)("inlineCode",{parentName:"p"},"deployment_name"),", ",(0,a.kt)("inlineCode",{parentName:"p"},"location"),", and ",(0,a.kt)("inlineCode",{parentName:"p"},"key")," variables to match those for your OpenAI service"),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-python"},'from synapse.ml.core.platform import find_secret\n\n# Fill in the following lines with your service information\n# Learn more about selecting which embedding model to choose: https://openai.com/blog/new-and-improved-embedding-model\nservice_name = "synapseml-openai-2"\ndeployment_name_embeddings = "text-embedding-ada-002"\n\nkey = find_secret(\n secret_name="openai-api-key-2", keyvault="mmlspark-build-keys"\n) # please replace this with your key as a string\n\nassert key is not None and service_name is not None\n')),(0,a.kt)("h2",{id:"step-4-load-data"},"Step 4: Load Data"),(0,a.kt)("p",null,"In this demo we will explore a dataset of fine food reviews"),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-python"},'import pyspark.sql.functions as F\n\ndf = (\n spark.read.options(inferSchema="True", delimiter=",", header=True)\n .csv("wasbs://publicwasb@mmlspark.blob.core.windows.net/fine_food_reviews_1k.csv")\n .repartition(5)\n)\n\ndf = df.withColumn(\n "combined",\n F.format_string("Title: %s; Content: %s", F.trim(df.Summary), F.trim(df.Text)),\n)\n\ndisplay(df)\n')),(0,a.kt)("h2",{id:"step-5-generate-embeddings"},"Step 5: Generate Embeddings"),(0,a.kt)("p",null,"We will first generate embeddings for the reviews using the SynapseML OpenAIEmbedding client."),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-python"},'from synapse.ml.services.openai import OpenAIEmbedding\n\nembedding = (\n OpenAIEmbedding()\n .setSubscriptionKey(key)\n .setDeploymentName(deployment_name_embeddings)\n .setCustomServiceName(service_name)\n .setTextCol("combined")\n .setErrorCol("error")\n .setOutputCol("embeddings")\n)\n\ncompleted_df = embedding.transform(df).cache()\ndisplay(completed_df)\n')),(0,a.kt)("h2",{id:"step-6-reduce-embedding-dimensionality-for-visualization"},"Step 6: Reduce Embedding dimensionality for Visualization"),(0,a.kt)("p",null,"We reduce the dimensionality to 2 dimensions using t-SNE decomposition."),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-python"},'import pandas as pd\nfrom sklearn.manifold import TSNE\nimport numpy as np\n\ncollected = list(completed_df.collect())\nmatrix = np.array([[r["embeddings"]] for r in collected])[:, 0, :].astype(np.float64)\nscores = np.array([[r["Score"]] for r in collected]).reshape(-1)\n\ntsne = TSNE(n_components=2, perplexity=15, random_state=42, init="pca")\nvis_dims = tsne.fit_transform(matrix)\nvis_dims.shape\n')),(0,a.kt)("h2",{id:"step-7-plot-the-embeddings"},"Step 7: Plot the embeddings"),(0,a.kt)("p",null,"We now use t-SNE to reduce the dimensionality of the embeddings from 1536 to 2. Once the embeddings are reduced to two dimensions, we can plot them in a 2D scatter plot. We colour each review by its star rating, ranging from red for negative reviews, to green for positive reviews. We can observe a decent data separation even in the reduced 2 dimensions."),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-python"},'import matplotlib.pyplot as plt\nimport matplotlib\nimport numpy as np\n\ncolors = ["red", "darkorange", "gold", "turquoise", "darkgreen"]\nx = [x for x, y in vis_dims]\ny = [y for x, y in vis_dims]\ncolor_indices = scores - 1\n\ncolormap = matplotlib.colors.ListedColormap(colors)\nplt.scatter(x, y, c=color_indices, cmap=colormap, alpha=0.3)\nfor score in [0, 1, 2, 3, 4]:\n avg_x = np.array(x)[scores - 1 == score].mean()\n avg_y = np.array(y)[scores - 1 == score].mean()\n color = colors[score]\n plt.scatter(avg_x, avg_y, marker="x", color=color, s=100)\n\nplt.title("Amazon ratings visualized in language using t-SNE")\n')),(0,a.kt)("h2",{id:"step-8-build-the-query-against-embeddings"},"Step 8: Build the query against embeddings"),(0,a.kt)("p",null,'Note: The data types of the ID columns in the document and query dataframes should be the same. For some OpenAI models, users should use separate models for embedding documents and queries. These models are denoted by the "-doc" and "-query" suffixes respectively.'),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-python"},'from pyspark.ml import PipelineModel\n\nembedding_query = (\n OpenAIEmbedding()\n .setSubscriptionKey(key)\n .setDeploymentName(deployment_name_embeddings)\n .setCustomServiceName(service_name)\n .setTextCol("query")\n .setErrorCol("error")\n .setOutputCol("embeddings")\n)\n\nquery_df = (\n spark.createDataFrame(\n [\n (\n 0,\n "desserts",\n ),\n (\n 1,\n "disgusting",\n ),\n ]\n )\n .toDF("id", "query")\n .withColumn("id", F.col("id").cast("int"))\n)\n\nembedding_query_df = (\n embedding_query.transform(query_df).select("id", "embeddings").cache()\n)\n')),(0,a.kt)("h2",{id:"step-9-fit-knn-model"},"Step 9: Fit KNN model"),(0,a.kt)("p",null,"Build KNN model using fit method"),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-python"},'from spark_rapids_ml.knn import NearestNeighbors\n\nrapids_knn = NearestNeighbors(k=10)\nrapids_knn.setInputCol("embeddings").setIdCol("id")\n\nrapids_knn_model = rapids_knn.fit(completed_df.select("id", "embeddings"))\n')),(0,a.kt)("h2",{id:"step-10-retrieve-query-results"},"Step 10: Retrieve query results"),(0,a.kt)("p",null,"Find k nearest neighbors"),(0,a.kt)("pre",null,(0,a.kt)("code",{parentName:"pre",className:"language-python"},"(_, _, knn_df) = rapids_knn_model.kneighbors(embedding_query_df)\n\ndisplay(knn_df)\n")))}m.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/ed6d544d.cdc541ce.js b/assets/js/ed6d544d.9766b089.js similarity index 59% rename from assets/js/ed6d544d.cdc541ce.js rename to assets/js/ed6d544d.9766b089.js index ddcafc176e..6091cf8a14 100644 --- a/assets/js/ed6d544d.cdc541ce.js +++ b/assets/js/ed6d544d.9766b089.js @@ -1 +1 @@ -"use strict";(self.webpackChunksynapseml=self.webpackChunksynapseml||[]).push([[63448],{3905:(e,a,t)=>{t.d(a,{Zo:()=>c,kt:()=>u});var n=t(67294);function r(e,a,t){return a in e?Object.defineProperty(e,a,{value:t,enumerable:!0,configurable:!0,writable:!0}):e[a]=t,e}function i(e,a){var t=Object.keys(e);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);a&&(n=n.filter((function(a){return Object.getOwnPropertyDescriptor(e,a).enumerable}))),t.push.apply(t,n)}return t}function o(e){for(var a=1;a=0||(r[t]=e[t]);return r}(e,a);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(e,t)&&(r[t]=e[t])}return r}var p=n.createContext({}),l=function(e){var a=n.useContext(p),t=a;return e&&(t="function"==typeof e?e(a):o(o({},a),e)),t},c=function(e){var a=l(e.components);return n.createElement(p.Provider,{value:a},e.children)},m={inlineCode:"code",wrapper:function(e){var a=e.children;return n.createElement(n.Fragment,{},a)}},h=n.forwardRef((function(e,a){var t=e.components,r=e.mdxType,i=e.originalType,p=e.parentName,c=s(e,["components","mdxType","originalType","parentName"]),h=l(t),u=r,d=h["".concat(p,".").concat(u)]||h[u]||m[u]||i;return t?n.createElement(d,o(o({ref:a},c),{},{components:t})):n.createElement(d,o({ref:a},c))}));function u(e,a){var t=arguments,r=a&&a.mdxType;if("string"==typeof e||r){var i=t.length,o=new Array(i);o[0]=h;var s={};for(var p in a)hasOwnProperty.call(a,p)&&(s[p]=a[p]);s.originalType=e,s.mdxType="string"==typeof e?e:r,o[1]=s;for(var l=2;l{t.r(a),t.d(a,{assets:()=>p,contentTitle:()=>o,default:()=>m,frontMatter:()=>i,metadata:()=>s,toc:()=>l});var n=t(83117),r=(t(67294),t(3905));const i={title:"Langchain",hide_title:!0,status:"stable"},o="Using the LangChain Transformer",s={unversionedId:"Explore Algorithms/OpenAI/Langchain",id:"Explore Algorithms/OpenAI/Langchain",title:"Langchain",description:"LangChain is a software development framework designed to simplify the creation of applications using large language models (LLMs). Chains in LangChain go beyond just a single LLM call and are sequences of calls (can be a call to an LLM or a different utility), automating the execution of a series of calls and actions.",source:"@site/docs/Explore Algorithms/OpenAI/Langchain.md",sourceDirName:"Explore Algorithms/OpenAI",slug:"/Explore Algorithms/OpenAI/Langchain",permalink:"/SynapseML/docs/next/Explore Algorithms/OpenAI/Langchain",draft:!1,tags:[],version:"current",frontMatter:{title:"Langchain",hide_title:!0,status:"stable"},sidebar:"docs",previous:{title:"Quickstart - Predictive Maintenance",permalink:"/SynapseML/docs/next/Explore Algorithms/AI Services/Quickstart - Predictive Maintenance"},next:{title:"OpenAI",permalink:"/SynapseML/docs/next/Explore Algorithms/OpenAI/"}},p={},l=[{value:"Step 1: Prerequisites",id:"step-1-prerequisites",level:2},{value:"Step 2: Import this guide as a notebook",id:"step-2-import-this-guide-as-a-notebook",level:2},{value:"Step 3: Fill in the service information and construct the LLM",id:"step-3-fill-in-the-service-information-and-construct-the-llm",level:2},{value:"Step 4: Basic Usage of LangChain Transformer",id:"step-4-basic-usage-of-langchain-transformer",level:2},{value:"Create a chain",id:"create-a-chain",level:3},{value:"Create a dataset and apply the chain",id:"create-a-dataset-and-apply-the-chain",level:3},{value:"Save and load the LangChain transformer",id:"save-and-load-the-langchain-transformer",level:3},{value:"Step 5: Using LangChain for Large scale literature review",id:"step-5-using-langchain-for-large-scale-literature-review",level:2},{value:"Create a Sequential Chain for paper summarization",id:"create-a-sequential-chain-for-paper-summarization",level:3},{value:"Apply the LangChain transformer to perform this workload at scale",id:"apply-the-langchain-transformer-to-perform-this-workload-at-scale",level:3}],c={toc:l};function m(e){let{components:a,...t}=e;return(0,r.kt)("wrapper",(0,n.Z)({},c,t,{components:a,mdxType:"MDXLayout"}),(0,r.kt)("h1",{id:"using-the-langchain-transformer"},"Using the LangChain Transformer"),(0,r.kt)("p",null,"LangChain is a software development framework designed to simplify the creation of applications using large language models (LLMs). Chains in LangChain go beyond just a single LLM call and are sequences of calls (can be a call to an LLM or a different utility), automating the execution of a series of calls and actions.\nTo make it easier to scale up the LangChain execution on a large dataset, we have integrated LangChain with the distributed machine learning library ",(0,r.kt)("a",{parentName:"p",href:"https://www.microsoft.com/en-us/research/blog/synapseml-a-simple-multilingual-and-massively-parallel-machine-learning-library/"},"SynapseML"),". This integration makes it easy to use the ",(0,r.kt)("a",{parentName:"p",href:"https://spark.apache.org/"},"Apache Spark")," distributed computing framework to process millions of data with the LangChain Framework."),(0,r.kt)("p",null,"This tutorial shows how to apply LangChain at scale for paper summarization and organization. We start with a table of arxiv links and apply the LangChain Transformerto automatically extract the corresponding paper title, authors, summary, and some related works."),(0,r.kt)("h2",{id:"step-1-prerequisites"},"Step 1: Prerequisites"),(0,r.kt)("p",null,"The key prerequisites for this quickstart include a working Azure OpenAI resource, and an Apache Spark cluster with SynapseML installed. We suggest creating a Synapse workspace, but an Azure Databricks, HDInsight, or Spark on Kubernetes, or even a python environment with the ",(0,r.kt)("inlineCode",{parentName:"p"},"pyspark")," package will work. "),(0,r.kt)("ol",null,(0,r.kt)("li",{parentName:"ol"},"An Azure OpenAI resource \u2013 request access ",(0,r.kt)("a",{parentName:"li",href:"https://customervoice.microsoft.com/Pages/ResponsePage.aspx?id=v4j5cvGGr0GRqy180BHbR7en2Ais5pxKtso_Pz4b1_xUOFA5Qk1UWDRBMjg0WFhPMkIzTzhKQ1dWNyQlQCN0PWcu"},"here")," before ",(0,r.kt)("a",{parentName:"li",href:"https://docs.microsoft.com/en-us/azure/cognitive-services/openai/how-to/create-resource?pivots=web-portal#create-a-resource"},"creating a resource")),(0,r.kt)("li",{parentName:"ol"},(0,r.kt)("a",{parentName:"li",href:"https://docs.microsoft.com/en-us/azure/synapse-analytics/get-started-create-workspace"},"Create a Synapse workspace")),(0,r.kt)("li",{parentName:"ol"},(0,r.kt)("a",{parentName:"li",href:"https://docs.microsoft.com/en-us/azure/synapse-analytics/get-started-analyze-spark#create-a-serverless-apache-spark-pool"},"Create a serverless Apache Spark pool"))),(0,r.kt)("h2",{id:"step-2-import-this-guide-as-a-notebook"},"Step 2: Import this guide as a notebook"),(0,r.kt)("p",null,"The next step is to add this code into your Spark cluster. You can either create a notebook in your Spark platform and copy the code into this notebook to run the demo. Or download the notebook and import it into Synapse Analytics"),(0,r.kt)("ol",null,(0,r.kt)("li",{parentName:"ol"},"Import the notebook into ",(0,r.kt)("a",{parentName:"li",href:"https://learn.microsoft.com/en-us/fabric/data-engineering/how-to-use-notebook"},"Microsoft Fabric"),", ",(0,r.kt)("a",{parentName:"li",href:"https://docs.microsoft.com/en-us/azure/synapse-analytics/spark/apache-spark-development-using-notebooks#create-a-notebook"},"Synapse Workspace")," or if using Databricks into the ",(0,r.kt)("a",{parentName:"li",href:"https://docs.microsoft.com/en-us/azure/databricks/notebooks/notebooks-manage#create-a-notebook"},"Databricks Workspace"),"."),(0,r.kt)("li",{parentName:"ol"},"Install SynapseML on your cluster. Please see the installation instructions for Synapse at the bottom of ",(0,r.kt)("a",{parentName:"li",href:"https://microsoft.github.io/SynapseML/"},"the SynapseML website"),". Note that this requires pasting an additional cell at the top of the notebook you just imported."),(0,r.kt)("li",{parentName:"ol"},"Connect your notebook to a cluster and follow along, editing and running the cells below.")),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-python"},"%pip install openai==0.28.1 langchain==0.0.331 pdf2image pdfminer.six unstructured==0.10.24 pytesseract numpy==1.22.4\n")),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-python"},"import os, openai, langchain, uuid\nfrom langchain.llms import AzureOpenAI, OpenAI\nfrom langchain.agents import load_tools, initialize_agent, AgentType\nfrom langchain.chains import TransformChain, LLMChain, SimpleSequentialChain\nfrom langchain.document_loaders import OnlinePDFLoader\nfrom langchain.tools.bing_search.tool import BingSearchRun, BingSearchAPIWrapper\nfrom langchain.prompts import PromptTemplate\nfrom synapse.ml.services.langchain import LangchainTransformer\nfrom synapse.ml.core.platform import running_on_synapse, find_secret\n")),(0,r.kt)("h2",{id:"step-3-fill-in-the-service-information-and-construct-the-llm"},"Step 3: Fill in the service information and construct the LLM"),(0,r.kt)("p",null,"Next, please edit the cell in the notebook to point to your service. In particular set the ",(0,r.kt)("inlineCode",{parentName:"p"},"model_name"),", ",(0,r.kt)("inlineCode",{parentName:"p"},"deployment_name"),", ",(0,r.kt)("inlineCode",{parentName:"p"},"openai_api_base"),", and ",(0,r.kt)("inlineCode",{parentName:"p"},"open_api_key")," variables to match those for your OpenAI service. Please feel free to replace ",(0,r.kt)("inlineCode",{parentName:"p"},"find_secret")," with your key as follows"),(0,r.kt)("p",null,(0,r.kt)("inlineCode",{parentName:"p"},'openai_api_key = "99sj2w82o...."')),(0,r.kt)("p",null,(0,r.kt)("inlineCode",{parentName:"p"},'bing_subscription_key = "..."')),(0,r.kt)("p",null,"Note that you also need to set up your Bing search to gain access to your ",(0,r.kt)("a",{parentName:"p",href:"https://learn.microsoft.com/en-us/bing/search-apis/bing-web-search/create-bing-search-service-resource"},"Bing Search subscription key"),"."),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-python"},'openai_api_key = find_secret(\n secret_name="openai-api-key", keyvault="mmlspark-build-keys"\n)\nopenai_api_base = "https://synapseml-openai.openai.azure.com/"\nopenai_api_version = "2022-12-01"\nopenai_api_type = "azure"\ndeployment_name = "text-davinci-003"\nbing_search_url = "https://api.bing.microsoft.com/v7.0/search"\nbing_subscription_key = find_secret(\n secret_name="bing-search-key", keyvault="mmlspark-build-keys"\n)\n\nos.environ["BING_SUBSCRIPTION_KEY"] = bing_subscription_key\nos.environ["BING_SEARCH_URL"] = bing_search_url\nos.environ["OPENAI_API_TYPE"] = openai_api_type\nos.environ["OPENAI_API_VERSION"] = openai_api_version\nos.environ["OPENAI_API_BASE"] = openai_api_base\nos.environ["OPENAI_API_KEY"] = openai_api_key\n\nllm = AzureOpenAI(\n deployment_name=deployment_name,\n model_name=deployment_name,\n temperature=0.1,\n verbose=True,\n)\n')),(0,r.kt)("h2",{id:"step-4-basic-usage-of-langchain-transformer"},"Step 4: Basic Usage of LangChain Transformer"),(0,r.kt)("h3",{id:"create-a-chain"},"Create a chain"),(0,r.kt)("p",null,"We will start by demonstrating the basic usage with a simple chain that creates definitions for input words"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-python"},'copy_prompt = PromptTemplate(\n input_variables=["technology"],\n template="Define the following word: {technology}",\n)\n\nchain = LLMChain(llm=llm, prompt=copy_prompt)\ntransformer = (\n LangchainTransformer()\n .setInputCol("technology")\n .setOutputCol("definition")\n .setChain(chain)\n .setSubscriptionKey(openai_api_key)\n .setUrl(openai_api_base)\n)\n')),(0,r.kt)("h3",{id:"create-a-dataset-and-apply-the-chain"},"Create a dataset and apply the chain"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-python"},'# construction of test dataframe\ndf = spark.createDataFrame(\n [(0, "docker"), (1, "spark"), (2, "python")], ["label", "technology"]\n)\ndisplay(transformer.transform(df))\n')),(0,r.kt)("h3",{id:"save-and-load-the-langchain-transformer"},"Save and load the LangChain transformer"),(0,r.kt)("p",null,"LangChain Transformers can be saved and loaded. Note that LangChain serialization only works for chains that don't have memory."),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-python"},'temp_dir = "tmp"\nif not os.path.exists(temp_dir):\n os.mkdir(temp_dir)\npath = os.path.join(temp_dir, "langchainTransformer")\ntransformer.save(path)\nloaded = LangchainTransformer.load(path)\ndisplay(loaded.transform(df))\n')),(0,r.kt)("h2",{id:"step-5-using-langchain-for-large-scale-literature-review"},"Step 5: Using LangChain for Large scale literature review"),(0,r.kt)("h3",{id:"create-a-sequential-chain-for-paper-summarization"},"Create a Sequential Chain for paper summarization"),(0,r.kt)("p",null,"We will now construct a Sequential Chain for extracting structured information from an arxiv link. In particular, we will ask langchain to extract the title, author information, and a summary of the paper content. After that, we use a web search tool to find the recent papers written by the first author."),(0,r.kt)("p",null,"To summarize, our sequential chain contains the following steps:"),(0,r.kt)("ol",null,(0,r.kt)("li",{parentName:"ol"},(0,r.kt)("strong",{parentName:"li"},"Transform Chain"),": Extract Paper Content from arxiv Link ",(0,r.kt)("strong",{parentName:"li"},"=>")),(0,r.kt)("li",{parentName:"ol"},(0,r.kt)("strong",{parentName:"li"},"LLMChain"),": Summarize the Paper, extract paper title and authors ",(0,r.kt)("strong",{parentName:"li"},"=>")),(0,r.kt)("li",{parentName:"ol"},(0,r.kt)("strong",{parentName:"li"},"Transform Chain"),": to generate the prompt ",(0,r.kt)("strong",{parentName:"li"},"=>")),(0,r.kt)("li",{parentName:"ol"},(0,r.kt)("strong",{parentName:"li"},"Agent with Web Search Tool"),": Use Web Search to find the recent papers by the first author")),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-python"},'def paper_content_extraction(inputs: dict) -> dict:\n arxiv_link = inputs["arxiv_link"]\n loader = OnlinePDFLoader(arxiv_link)\n pages = loader.load_and_split()\n return {"paper_content": pages[0].page_content + pages[1].page_content}\n\n\ndef prompt_generation(inputs: dict) -> dict:\n output = inputs["Output"]\n prompt = (\n "find the paper title, author, summary in the paper description below, output them. After that, Use websearch to find out 3 recent papers of the first author in the author section below (first author is the first name separated by comma) and list the paper titles in bullet points: \\n"\n + output\n + "."\n )\n return {"prompt": prompt}\n\n\npaper_content_extraction_chain = TransformChain(\n input_variables=["arxiv_link"],\n output_variables=["paper_content"],\n transform=paper_content_extraction,\n verbose=False,\n)\n\npaper_summarizer_template = """You are a paper summarizer, given the paper content, it is your job to summarize the paper into a short summary, and extract authors and paper title from the paper content.\nHere is the paper content:\n{paper_content}\nOutput:\npaper title, authors and summary.\n"""\nprompt = PromptTemplate(\n input_variables=["paper_content"], template=paper_summarizer_template\n)\nsummarize_chain = LLMChain(llm=llm, prompt=prompt, verbose=False)\n\nprompt_generation_chain = TransformChain(\n input_variables=["Output"],\n output_variables=["prompt"],\n transform=prompt_generation,\n verbose=False,\n)\n\nbing = BingSearchAPIWrapper(k=3)\ntools = [BingSearchRun(api_wrapper=bing)]\nweb_search_agent = initialize_agent(\n tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=False\n)\n\nsequential_chain = SimpleSequentialChain(\n chains=[\n paper_content_extraction_chain,\n summarize_chain,\n prompt_generation_chain,\n web_search_agent,\n ]\n)\n')),(0,r.kt)("h3",{id:"apply-the-langchain-transformer-to-perform-this-workload-at-scale"},"Apply the LangChain transformer to perform this workload at scale"),(0,r.kt)("p",null,"We can now use our chain at scale using the ",(0,r.kt)("inlineCode",{parentName:"p"},"LangchainTransformer")),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-python"},'paper_df = spark.createDataFrame(\n [\n (0, "https://arxiv.org/pdf/2107.13586.pdf"),\n (1, "https://arxiv.org/pdf/2101.00190.pdf"),\n (2, "https://arxiv.org/pdf/2103.10385.pdf"),\n (3, "https://arxiv.org/pdf/2110.07602.pdf"),\n ],\n ["label", "arxiv_link"],\n)\n\n# construct langchain transformer using the paper summarizer chain define above\npaper_info_extractor = (\n LangchainTransformer()\n .setInputCol("arxiv_link")\n .setOutputCol("paper_info")\n .setChain(sequential_chain)\n .setSubscriptionKey(openai_api_key)\n .setUrl(openai_api_base)\n)\n\n\n# extract paper information from arxiv links, the paper information needs to include:\n# paper title, paper authors, brief paper summary, and recent papers published by the first author\ndisplay(paper_info_extractor.transform(paper_df))\n')))}m.isMDXComponent=!0}}]); \ No newline at end of file +"use strict";(self.webpackChunksynapseml=self.webpackChunksynapseml||[]).push([[63448],{3905:(e,a,t)=>{t.d(a,{Zo:()=>c,kt:()=>u});var n=t(67294);function r(e,a,t){return a in e?Object.defineProperty(e,a,{value:t,enumerable:!0,configurable:!0,writable:!0}):e[a]=t,e}function i(e,a){var t=Object.keys(e);if(Object.getOwnPropertySymbols){var n=Object.getOwnPropertySymbols(e);a&&(n=n.filter((function(a){return Object.getOwnPropertyDescriptor(e,a).enumerable}))),t.push.apply(t,n)}return t}function o(e){for(var a=1;a=0||(r[t]=e[t]);return r}(e,a);if(Object.getOwnPropertySymbols){var i=Object.getOwnPropertySymbols(e);for(n=0;n=0||Object.prototype.propertyIsEnumerable.call(e,t)&&(r[t]=e[t])}return r}var p=n.createContext({}),l=function(e){var a=n.useContext(p),t=a;return e&&(t="function"==typeof e?e(a):o(o({},a),e)),t},c=function(e){var a=l(e.components);return n.createElement(p.Provider,{value:a},e.children)},m={inlineCode:"code",wrapper:function(e){var a=e.children;return n.createElement(n.Fragment,{},a)}},h=n.forwardRef((function(e,a){var t=e.components,r=e.mdxType,i=e.originalType,p=e.parentName,c=s(e,["components","mdxType","originalType","parentName"]),h=l(t),u=r,d=h["".concat(p,".").concat(u)]||h[u]||m[u]||i;return t?n.createElement(d,o(o({ref:a},c),{},{components:t})):n.createElement(d,o({ref:a},c))}));function u(e,a){var t=arguments,r=a&&a.mdxType;if("string"==typeof e||r){var i=t.length,o=new Array(i);o[0]=h;var s={};for(var p in a)hasOwnProperty.call(a,p)&&(s[p]=a[p]);s.originalType=e,s.mdxType="string"==typeof e?e:r,o[1]=s;for(var l=2;l{t.r(a),t.d(a,{assets:()=>p,contentTitle:()=>o,default:()=>m,frontMatter:()=>i,metadata:()=>s,toc:()=>l});var n=t(83117),r=(t(67294),t(3905));const i={title:"Langchain",hide_title:!0,status:"stable"},o="Using the LangChain Transformer",s={unversionedId:"Explore Algorithms/OpenAI/Langchain",id:"Explore Algorithms/OpenAI/Langchain",title:"Langchain",description:"LangChain is a software development framework designed to simplify the creation of applications using large language models (LLMs). Chains in LangChain go beyond just a single LLM call and are sequences of calls (can be a call to an LLM or a different utility), automating the execution of a series of calls and actions.",source:"@site/docs/Explore Algorithms/OpenAI/Langchain.md",sourceDirName:"Explore Algorithms/OpenAI",slug:"/Explore Algorithms/OpenAI/Langchain",permalink:"/SynapseML/docs/next/Explore Algorithms/OpenAI/Langchain",draft:!1,tags:[],version:"current",frontMatter:{title:"Langchain",hide_title:!0,status:"stable"},sidebar:"docs",previous:{title:"Quickstart - Predictive Maintenance",permalink:"/SynapseML/docs/next/Explore Algorithms/AI Services/Quickstart - Predictive Maintenance"},next:{title:"OpenAI",permalink:"/SynapseML/docs/next/Explore Algorithms/OpenAI/"}},p={},l=[{value:"Step 1: Prerequisites",id:"step-1-prerequisites",level:2},{value:"Step 2: Import this guide as a notebook",id:"step-2-import-this-guide-as-a-notebook",level:2},{value:"Step 3: Fill in the service information and construct the LLM",id:"step-3-fill-in-the-service-information-and-construct-the-llm",level:2},{value:"Step 4: Basic Usage of LangChain Transformer",id:"step-4-basic-usage-of-langchain-transformer",level:2},{value:"Create a chain",id:"create-a-chain",level:3},{value:"Create a dataset and apply the chain",id:"create-a-dataset-and-apply-the-chain",level:3},{value:"Save and load the LangChain transformer",id:"save-and-load-the-langchain-transformer",level:3},{value:"Step 5: Using LangChain for Large scale literature review",id:"step-5-using-langchain-for-large-scale-literature-review",level:2},{value:"Create a Sequential Chain for paper summarization",id:"create-a-sequential-chain-for-paper-summarization",level:3},{value:"Apply the LangChain transformer to perform this workload at scale",id:"apply-the-langchain-transformer-to-perform-this-workload-at-scale",level:3}],c={toc:l};function m(e){let{components:a,...t}=e;return(0,r.kt)("wrapper",(0,n.Z)({},c,t,{components:a,mdxType:"MDXLayout"}),(0,r.kt)("h1",{id:"using-the-langchain-transformer"},"Using the LangChain Transformer"),(0,r.kt)("p",null,"LangChain is a software development framework designed to simplify the creation of applications using large language models (LLMs). Chains in LangChain go beyond just a single LLM call and are sequences of calls (can be a call to an LLM or a different utility), automating the execution of a series of calls and actions.\nTo make it easier to scale up the LangChain execution on a large dataset, we have integrated LangChain with the distributed machine learning library ",(0,r.kt)("a",{parentName:"p",href:"https://www.microsoft.com/en-us/research/blog/synapseml-a-simple-multilingual-and-massively-parallel-machine-learning-library/"},"SynapseML"),". This integration makes it easy to use the ",(0,r.kt)("a",{parentName:"p",href:"https://spark.apache.org/"},"Apache Spark")," distributed computing framework to process millions of data with the LangChain Framework."),(0,r.kt)("p",null,"This tutorial shows how to apply LangChain at scale for paper summarization and organization. We start with a table of arxiv links and apply the LangChain Transformerto automatically extract the corresponding paper title, authors, summary, and some related works."),(0,r.kt)("h2",{id:"step-1-prerequisites"},"Step 1: Prerequisites"),(0,r.kt)("p",null,"The key prerequisites for this quickstart include a working Azure OpenAI resource, and an Apache Spark cluster with SynapseML installed. We suggest creating a Synapse workspace, but an Azure Databricks, HDInsight, or Spark on Kubernetes, or even a python environment with the ",(0,r.kt)("inlineCode",{parentName:"p"},"pyspark")," package will work. "),(0,r.kt)("ol",null,(0,r.kt)("li",{parentName:"ol"},"An Azure OpenAI resource \u2013 request access ",(0,r.kt)("a",{parentName:"li",href:"https://customervoice.microsoft.com/Pages/ResponsePage.aspx?id=v4j5cvGGr0GRqy180BHbR7en2Ais5pxKtso_Pz4b1_xUOFA5Qk1UWDRBMjg0WFhPMkIzTzhKQ1dWNyQlQCN0PWcu"},"here")," before ",(0,r.kt)("a",{parentName:"li",href:"https://docs.microsoft.com/en-us/azure/cognitive-services/openai/how-to/create-resource?pivots=web-portal#create-a-resource"},"creating a resource")),(0,r.kt)("li",{parentName:"ol"},(0,r.kt)("a",{parentName:"li",href:"https://docs.microsoft.com/en-us/azure/synapse-analytics/get-started-create-workspace"},"Create a Synapse workspace")),(0,r.kt)("li",{parentName:"ol"},(0,r.kt)("a",{parentName:"li",href:"https://docs.microsoft.com/en-us/azure/synapse-analytics/get-started-analyze-spark#create-a-serverless-apache-spark-pool"},"Create a serverless Apache Spark pool"))),(0,r.kt)("h2",{id:"step-2-import-this-guide-as-a-notebook"},"Step 2: Import this guide as a notebook"),(0,r.kt)("p",null,"The next step is to add this code into your Spark cluster. You can either create a notebook in your Spark platform and copy the code into this notebook to run the demo. Or download the notebook and import it into Synapse Analytics"),(0,r.kt)("ol",null,(0,r.kt)("li",{parentName:"ol"},"Import the notebook into ",(0,r.kt)("a",{parentName:"li",href:"https://learn.microsoft.com/en-us/fabric/data-engineering/how-to-use-notebook"},"Microsoft Fabric"),", ",(0,r.kt)("a",{parentName:"li",href:"https://docs.microsoft.com/en-us/azure/synapse-analytics/spark/apache-spark-development-using-notebooks#create-a-notebook"},"Synapse Workspace")," or if using Databricks into the ",(0,r.kt)("a",{parentName:"li",href:"https://docs.microsoft.com/en-us/azure/databricks/notebooks/notebooks-manage#create-a-notebook"},"Databricks Workspace"),"."),(0,r.kt)("li",{parentName:"ol"},"Install SynapseML on your cluster. Please see the installation instructions for Synapse at the bottom of ",(0,r.kt)("a",{parentName:"li",href:"https://microsoft.github.io/SynapseML/"},"the SynapseML website"),". Note that this requires pasting an additional cell at the top of the notebook you just imported."),(0,r.kt)("li",{parentName:"ol"},"Connect your notebook to a cluster and follow along, editing and running the cells below.")),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-python"},"%pip install openai==0.28.1 langchain==0.0.331 pdf2image pdfminer.six unstructured==0.10.24 pytesseract numpy==1.22.4\n")),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-python"},"import os, openai, langchain, uuid\nfrom langchain.llms import AzureOpenAI, OpenAI\nfrom langchain.agents import load_tools, initialize_agent, AgentType\nfrom langchain.chains import TransformChain, LLMChain, SimpleSequentialChain\nfrom langchain.document_loaders import OnlinePDFLoader\nfrom langchain.tools.bing_search.tool import BingSearchRun, BingSearchAPIWrapper\nfrom langchain.prompts import PromptTemplate\nfrom synapse.ml.services.langchain import LangchainTransformer\nfrom synapse.ml.core.platform import running_on_synapse, find_secret\n")),(0,r.kt)("h2",{id:"step-3-fill-in-the-service-information-and-construct-the-llm"},"Step 3: Fill in the service information and construct the LLM"),(0,r.kt)("p",null,"Next, please edit the cell in the notebook to point to your service. In particular set the ",(0,r.kt)("inlineCode",{parentName:"p"},"model_name"),", ",(0,r.kt)("inlineCode",{parentName:"p"},"deployment_name"),", ",(0,r.kt)("inlineCode",{parentName:"p"},"openai_api_base"),", and ",(0,r.kt)("inlineCode",{parentName:"p"},"open_api_key")," variables to match those for your OpenAI service. Please feel free to replace ",(0,r.kt)("inlineCode",{parentName:"p"},"find_secret")," with your key as follows"),(0,r.kt)("p",null,(0,r.kt)("inlineCode",{parentName:"p"},'openai_api_key = "99sj2w82o...."')),(0,r.kt)("p",null,(0,r.kt)("inlineCode",{parentName:"p"},'bing_subscription_key = "..."')),(0,r.kt)("p",null,"Note that you also need to set up your Bing search to gain access to your ",(0,r.kt)("a",{parentName:"p",href:"https://learn.microsoft.com/en-us/bing/search-apis/bing-web-search/create-bing-search-service-resource"},"Bing Search subscription key"),"."),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-python"},'openai_api_key = find_secret(\n secret_name="openai-api-key-2", keyvault="mmlspark-build-keys"\n)\nopenai_api_base = "https://synapseml-openai-2.openai.azure.com/"\nopenai_api_version = "2022-12-01"\nopenai_api_type = "azure"\ndeployment_name = "text-davinci-003"\nbing_search_url = "https://api.bing.microsoft.com/v7.0/search"\nbing_subscription_key = find_secret(\n secret_name="bing-search-key", keyvault="mmlspark-build-keys"\n)\n\nos.environ["BING_SUBSCRIPTION_KEY"] = bing_subscription_key\nos.environ["BING_SEARCH_URL"] = bing_search_url\nos.environ["OPENAI_API_TYPE"] = openai_api_type\nos.environ["OPENAI_API_VERSION"] = openai_api_version\nos.environ["OPENAI_API_BASE"] = openai_api_base\nos.environ["OPENAI_API_KEY"] = openai_api_key\n\nllm = AzureOpenAI(\n deployment_name=deployment_name,\n model_name=deployment_name,\n temperature=0.1,\n verbose=True,\n)\n')),(0,r.kt)("h2",{id:"step-4-basic-usage-of-langchain-transformer"},"Step 4: Basic Usage of LangChain Transformer"),(0,r.kt)("h3",{id:"create-a-chain"},"Create a chain"),(0,r.kt)("p",null,"We will start by demonstrating the basic usage with a simple chain that creates definitions for input words"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-python"},'copy_prompt = PromptTemplate(\n input_variables=["technology"],\n template="Define the following word: {technology}",\n)\n\nchain = LLMChain(llm=llm, prompt=copy_prompt)\ntransformer = (\n LangchainTransformer()\n .setInputCol("technology")\n .setOutputCol("definition")\n .setChain(chain)\n .setSubscriptionKey(openai_api_key)\n .setUrl(openai_api_base)\n)\n')),(0,r.kt)("h3",{id:"create-a-dataset-and-apply-the-chain"},"Create a dataset and apply the chain"),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-python"},'# construction of test dataframe\ndf = spark.createDataFrame(\n [(0, "docker"), (1, "spark"), (2, "python")], ["label", "technology"]\n)\ndisplay(transformer.transform(df))\n')),(0,r.kt)("h3",{id:"save-and-load-the-langchain-transformer"},"Save and load the LangChain transformer"),(0,r.kt)("p",null,"LangChain Transformers can be saved and loaded. Note that LangChain serialization only works for chains that don't have memory."),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-python"},'temp_dir = "tmp"\nif not os.path.exists(temp_dir):\n os.mkdir(temp_dir)\npath = os.path.join(temp_dir, "langchainTransformer")\ntransformer.save(path)\nloaded = LangchainTransformer.load(path)\ndisplay(loaded.transform(df))\n')),(0,r.kt)("h2",{id:"step-5-using-langchain-for-large-scale-literature-review"},"Step 5: Using LangChain for Large scale literature review"),(0,r.kt)("h3",{id:"create-a-sequential-chain-for-paper-summarization"},"Create a Sequential Chain for paper summarization"),(0,r.kt)("p",null,"We will now construct a Sequential Chain for extracting structured information from an arxiv link. In particular, we will ask langchain to extract the title, author information, and a summary of the paper content. After that, we use a web search tool to find the recent papers written by the first author."),(0,r.kt)("p",null,"To summarize, our sequential chain contains the following steps:"),(0,r.kt)("ol",null,(0,r.kt)("li",{parentName:"ol"},(0,r.kt)("strong",{parentName:"li"},"Transform Chain"),": Extract Paper Content from arxiv Link ",(0,r.kt)("strong",{parentName:"li"},"=>")),(0,r.kt)("li",{parentName:"ol"},(0,r.kt)("strong",{parentName:"li"},"LLMChain"),": Summarize the Paper, extract paper title and authors ",(0,r.kt)("strong",{parentName:"li"},"=>")),(0,r.kt)("li",{parentName:"ol"},(0,r.kt)("strong",{parentName:"li"},"Transform Chain"),": to generate the prompt ",(0,r.kt)("strong",{parentName:"li"},"=>")),(0,r.kt)("li",{parentName:"ol"},(0,r.kt)("strong",{parentName:"li"},"Agent with Web Search Tool"),": Use Web Search to find the recent papers by the first author")),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-python"},'def paper_content_extraction(inputs: dict) -> dict:\n arxiv_link = inputs["arxiv_link"]\n loader = OnlinePDFLoader(arxiv_link)\n pages = loader.load_and_split()\n return {"paper_content": pages[0].page_content + pages[1].page_content}\n\n\ndef prompt_generation(inputs: dict) -> dict:\n output = inputs["Output"]\n prompt = (\n "find the paper title, author, summary in the paper description below, output them. After that, Use websearch to find out 3 recent papers of the first author in the author section below (first author is the first name separated by comma) and list the paper titles in bullet points: \\n"\n + output\n + "."\n )\n return {"prompt": prompt}\n\n\npaper_content_extraction_chain = TransformChain(\n input_variables=["arxiv_link"],\n output_variables=["paper_content"],\n transform=paper_content_extraction,\n verbose=False,\n)\n\npaper_summarizer_template = """You are a paper summarizer, given the paper content, it is your job to summarize the paper into a short summary, and extract authors and paper title from the paper content.\nHere is the paper content:\n{paper_content}\nOutput:\npaper title, authors and summary.\n"""\nprompt = PromptTemplate(\n input_variables=["paper_content"], template=paper_summarizer_template\n)\nsummarize_chain = LLMChain(llm=llm, prompt=prompt, verbose=False)\n\nprompt_generation_chain = TransformChain(\n input_variables=["Output"],\n output_variables=["prompt"],\n transform=prompt_generation,\n verbose=False,\n)\n\nbing = BingSearchAPIWrapper(k=3)\ntools = [BingSearchRun(api_wrapper=bing)]\nweb_search_agent = initialize_agent(\n tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=False\n)\n\nsequential_chain = SimpleSequentialChain(\n chains=[\n paper_content_extraction_chain,\n summarize_chain,\n prompt_generation_chain,\n web_search_agent,\n ]\n)\n')),(0,r.kt)("h3",{id:"apply-the-langchain-transformer-to-perform-this-workload-at-scale"},"Apply the LangChain transformer to perform this workload at scale"),(0,r.kt)("p",null,"We can now use our chain at scale using the ",(0,r.kt)("inlineCode",{parentName:"p"},"LangchainTransformer")),(0,r.kt)("pre",null,(0,r.kt)("code",{parentName:"pre",className:"language-python"},'paper_df = spark.createDataFrame(\n [\n (0, "https://arxiv.org/pdf/2107.13586.pdf"),\n (1, "https://arxiv.org/pdf/2101.00190.pdf"),\n (2, "https://arxiv.org/pdf/2103.10385.pdf"),\n (3, "https://arxiv.org/pdf/2110.07602.pdf"),\n ],\n ["label", "arxiv_link"],\n)\n\n# construct langchain transformer using the paper summarizer chain define above\npaper_info_extractor = (\n LangchainTransformer()\n .setInputCol("arxiv_link")\n .setOutputCol("paper_info")\n .setChain(sequential_chain)\n .setSubscriptionKey(openai_api_key)\n .setUrl(openai_api_base)\n)\n\n\n# extract paper information from arxiv links, the paper information needs to include:\n# paper title, paper authors, brief paper summary, and recent papers published by the first author\ndisplay(paper_info_extractor.transform(paper_df))\n')))}m.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/assets/js/runtime~main.75c4a7e4.js b/assets/js/runtime~main.dd3a0c4a.js similarity index 98% rename from assets/js/runtime~main.75c4a7e4.js rename to assets/js/runtime~main.dd3a0c4a.js index d0cc8d8455..c4b422419e 100644 --- a/assets/js/runtime~main.75c4a7e4.js +++ b/assets/js/runtime~main.dd3a0c4a.js @@ -1 +1 @@ -(()=>{"use strict";var e,b,f,d,a,c={},t={};function r(e){var b=t[e];if(void 0!==b)return b.exports;var f=t[e]={exports:{}};return c[e].call(f.exports,f,f.exports,r),f.exports}r.m=c,e=[],r.O=(b,f,d,a)=>{if(!f){var c=1/0;for(i=0;i=a)&&Object.keys(r.O).every((e=>r.O[e](f[o])))?f.splice(o--,1):(t=!1,a0&&e[i-1][2]>a;i--)e[i]=e[i-1];e[i]=[f,d,a]},r.n=e=>{var b=e&&e.__esModule?()=>e.default:()=>e;return r.d(b,{a:b}),b},f=Object.getPrototypeOf?e=>Object.getPrototypeOf(e):e=>e.__proto__,r.t=function(e,d){if(1&d&&(e=this(e)),8&d)return e;if("object"==typeof e&&e){if(4&d&&e.__esModule)return e;if(16&d&&"function"==typeof e.then)return e}var a=Object.create(null);r.r(a);var c={};b=b||[null,f({}),f([]),f(f)];for(var t=2&d&&e;"object"==typeof t&&!~b.indexOf(t);t=f(t))Object.getOwnPropertyNames(t).forEach((b=>c[b]=()=>e[b]));return c.default=()=>e,r.d(a,c),a},r.d=(e,b)=>{for(var f in b)r.o(b,f)&&!r.o(e,f)&&Object.defineProperty(e,f,{enumerable:!0,get:b[f]})},r.f={},r.e=e=>Promise.all(Object.keys(r.f).reduce(((b,f)=>(r.f[f](e,b),b)),[])),r.u=e=>"assets/js/"+({226:"32fe2e34",271:"e8d66fc1",360:"88b63415",390:"001aea4c",403:"7b60d8e3",429:"c4d68582",587:"b83021be",688:"b66ab6b5",787:"20641120",806:"b3272230",847:"73c2022c",939:"0a65b9de",1084:"421d486c",1123:"1f6f5ac9",1172:"255635fc",1283:"f264933a",1286:"fd4bd09d",1773:"3cbfbf75",1802:"513002de",1854:"ad10988d",1985:"368de161",2070:"6c82e982",2195:"b72abe57",2381:"59e4d88e",2408:"09bc1516",2797:"e4e95844",2901:"980b00ea",3202:"6c61f93b",3554:"55203621",3611:"6e159789",3701:"513214d5",3765:"d1428557",3814:"b41f295c",3871:"445012ce",3953:"60a2189a",4026:"73cf52e8",4193:"1ad91055",4204:"ea2d2bcc",4503:"1e0d2d3b",4800:"c065fcb9",5e3:"cf57716c",5253:"405fabd0",5254:"33b3776b",5287:"b8963dc0",5378:"fc338b02",5789:"1155dc6f",5909:"78cb5959",6004:"10b7acf6",6289:"75801678",6304:"5e008bd1",6413:"4bbbdfcf",6633:"707d2a35",6712:"ed40d5f1",7201:"567cd2fd",7234:"268b7b40",7509:"9530a2cf",7925:"9c5088a2",8006:"9da9112f",8040:"e24ee294",8146:"d13e532e",8168:"e7bd83ca",8749:"2d527871",8839:"a563fa93",8966:"41f327fc",9296:"1518f45a",9540:"4f1ca6a9",10643:"014ce9a3",10912:"e12b4691",10978:"ac581902",11298:"c76e8cac",11542:"fba72e87",11561:"2ece0fbe",11977:"3b5de274",12124:"d05981ec",12398:"6c7ccb6d",12630:"5962ef6e",12824:"75514f6a",12840:"4bea8531",13431:"1b3ab3b8",13482:"ef6ec597",14273:"1b7af47d",14341:"a5ffe972",14535:"1842239f",15095:"90e4432b",15149:"603706f6",15165:"4f81391a",15253:"b7787e72",15343:"1abf1e18",15404:"ac39f7dc",15487:"3a4f9d93",15511:"a9330763",15566:"ebf648dd",15579:"dd29718b",15788:"f3e986ee",15800:"18c7d2a3",15839:"07f6b90a",16030:"0f113696",16269:"76d23901",16286:"bef0d2d3",16551:"3c1f4383",16833:"5b779334",16975:"51dbb02f",17026:"3ea19ec0",17295:"6eddee4c",17538:"ad139e3c",17542:"66d63bfc",17655:"b1706689",17782:"b328a361",17817:"1766722a",17820:"dd3f9ada",17973:"63cd8b22",18373:"445a1b28",18443:"ceeda143",18563:"d84904f9",18825:"c73cfc52",18828:"514f485e",18933:"11c317cb",19042:"c14fdd92",19336:"8bff2444",19465:"548964ce",19483:"15ea8379",19603:"2b30bc14",19775:"b2be3901",19950:"275c0efd",21102:"20e0fe38",21401:"6e4e5cd9",21933:"cb7c2a83",22007:"a34c49d0",22011:"e316de39",22287:"5407376f",22341:"aa5548ab",22577:"8af72580",22707:"49e6864b",22712:"6b40ec54",22970:"88f8e3e1",23036:"052105dd",23234:"c0391845",23544:"4e3910af",23807:"f21c13c4",23838:"92ce4fd5",23894:"35826f14",23941:"2bcda99e",23979:"48329b94",24046:"7aeb6f16",24422:"c5c95e45",24484:"1a3608a9",24594:"b962e007",24627:"e579bad6",25116:"10e13725",25449:"cf666d49",25844:"cee4c3b9",25990:"38d6824e",26021:"295a8e78",26184:"0eaf8468",26676:"03d0d544",26685:"c2e57a1b",26871:"8a1bca9d",27497:"ce40236b",27572:"a07d09c1",27813:"35bdc661",27918:"17896441",27991:"dc1e40d7",28456:"8d990105",28465:"6e2678b7",28559:"dac82c00",28634:"f1b2dc7f",28696:"b066233f",28746:"d620a661",28955:"6e71bda3",28957:"2d5b0e6a",29006:"9783ff24",29283:"7c007ec6",29514:"1be78505",29535:"905ce8b5",29540:"c82134d8",29696:"166d99f5",29806:"b9f125cc",29845:"39a50b22",30013:"5eddefdd",30023:"b739c60a",30301:"627aefca",30335:"22444eb9",30460:"0a9c6347",30464:"47bbce76",30927:"a6191053",31153:"c95882d3",31279:"fd3b6ea5",31333:"521f9727",31836:"645abc7d",31896:"9fa7e8eb",32185:"561bd03d",32599:"5f17911b",32777:"8ff6772d",32789:"0606f1a5",32855:"5bf420e9",33064:"da508cef",33195:"6a26e359",33358:"5c18deb5",33631:"90f00051",33988:"01587ade",34334:"141dc4a8",34397:"3e1e84da",34654:"e887b273",34797:"0d432c84",34940:"7275f11a",35015:"2c5f0abd",35024:"0daa5b3f",35040:"767a7177",35056:"c3b1d949",35124:"061b38c5",35328:"54287b47",35517:"22f921cc",35537:"c3dac62d",35750:"0637d1e7",35939:"0453ae68",36005:"90742975",36204:"50af03e4",36224:"6c7d288d",36285:"b0e28723",36572:"589adaf2",36585:"b7226801",36673:"affaa541",36698:"aba932dd",36746:"5ef28062",36773:"b4ae214b",36828:"df01388f",36916:"f027bb15",36958:"1e57c92a",37001:"83586bff",37069:"40b49758",37334:"27de0303",37392:"ef77ef39",37474:"21f47424",37486:"a406e231",37592:"0713a501",37610:"077ca2ec",37638:"d57515f7",37676:"4631a032",37742:"5efd5006",38057:"d9e5e528",38078:"6f8cd013",38261:"012c1c0f",38377:"70b306ba",38399:"327be84b",38759:"16d05917",38781:"5e7c3303",38846:"bf04857e",38859:"b7802ae1",38962:"4b18e5ae",39139:"ae89cd3b",39242:"bb7a48da",39307:"0d6d64cd",39402:"16d8ca91",39596:"4e105cba",40023:"2e93ae0f",40367:"fece3c24",40412:"9e5b788f",40659:"610d430a",40852:"965ed185",40950:"1db1f785",41607:"04804d02",41719:"b66e2100",41867:"391cb159",41944:"79114dfe",41987:"2cec6bcf",42037:"328f3af4",42283:"e3089280",42377:"1789daeb",42428:"31ee0fa3",42522:"6c4c8509",42638:"e21a3367",42802:"3ee83f58",42815:"993c5022",42852:"423540f2",42947:"42736d5f",42970:"de3bb7c3",43156:"1587ba1d",43251:"455aa1bd",43344:"995576e9",43426:"494501bd",43488:"336404dc",43730:"034e6169",44177:"d41234a3",44178:"70fe7dd7",44334:"00ab2cda",44422:"cd8e97a5",44490:"d81d1137",44504:"99e5ffa1",44534:"2d8a44d7",44539:"fe6131e8",45282:"104b9b56",45370:"7649ec47",45503:"bc85b804",45589:"7ad384dc",45938:"8799ef55",46103:"ccc49370",46135:"96cf5ff0",46338:"0fb98d33",46559:"98258c5e",46613:"b4898d44",46807:"c39e3602",46845:"79d79762",47454:"c5775233",47552:"01eb996b",48007:"408524d4",48491:"59ef8022",48602:"25fb872c",48807:"13246fd5",49101:"eed630e0",49196:"efbc69e1",49778:"3e7c5569",49814:"3a7a88af",50002:"fc598b5d",50040:"a81cf8b0",50196:"9c279ae7",50297:"fe2c893b",50653:"97592aac",50745:"069e3c4c",50783:"5c0b440a",51078:"ff428354",51105:"6b9bdd6f",51317:"6a9ad78d",51478:"b4a95996",51732:"e9aab890",51842:"d8d6ba90",51910:"644ff257",51918:"9688a5be",52406:"c4d09a44",52433:"345903d4",52447:"2e5f4263",52535:"814f3328",52730:"a3fd5a6d",52752:"9b020c4a",52787:"ed5c6c48",52804:"12b89d4a",53017:"bf002efe",53050:"943afd32",53090:"86ed1ff2",53338:"b88f1f9b",53572:"a8456d9f",53608:"9e4087bc",54528:"cb5f1a06",54890:"f537d1e9",55276:"76b137bf",55305:"ca0a1bad",55538:"bece3771",55702:"41ee0cff",55864:"ce7af6ae",56373:"cff2e56e",57074:"1293b2b0",57168:"d9837698",57231:"9d095dba",57258:"e2689dfc",57689:"39122aab",57887:"be95fd8f",57924:"15a59c33",57930:"d39aa6d3",58092:"4d31bfb3",58462:"d989c707",58524:"0349e4f0",58738:"b940e0b5",58755:"990f07de",58846:"3bed3e2f",58861:"7c2e1100",59076:"dfb6dfe9",59394:"4801d370",59484:"71042800",59823:"76bf568a",60055:"403e642d",60445:"f322d2da",60880:"f0649d54",61407:"3ef20ed6",61428:"b5375b6f",61442:"b75118f0",61755:"04fc65cc",61778:"a0d8a1a2",61829:"f38a0cbd",62079:"5cfa133c",62106:"2c98ca3e",62217:"04ee4ab4",62465:"ddacd194",62565:"c38c658c",62675:"4ef2f7cf",62721:"d95a70ea",62750:"52880d18",62773:"08019edc",63002:"bf8a5c8a",63099:"95d9e891",63196:"74091a62",63402:"56543a15",63448:"ed6d544d",64172:"3bbe0f34",64195:"c4f5d8e4",64233:"b7c6295c",64482:"0e3f998d",64713:"d355047f",64927:"fa2e2688",64992:"37ab9beb",65437:"44ff0d07",65752:"c3c516ff",65942:"900b935c",66167:"74659d33",66419:"e5fe80e3",66555:"0fa36e32",66808:"8462374b",66945:"296054a7",66981:"e863531f",67218:"ba9c0924",67287:"99f8fee5",67394:"31bb7274",67448:"888e2229",67597:"a608660b",67664:"faca745a",67706:"db170a23",67870:"ed316aed",68070:"a1824f78",68283:"e84755e1",68314:"104bf0a6",68526:"3201258e",68549:"c991f47b",68585:"18c9c2ba",68809:"fd748117",69159:"57e687e8",69226:"8181e18a",69314:"232d09b0",69559:"3fb29942",69966:"d1246f49",70224:"77c259dd",70344:"7aa7b8e0",70406:"6a07b55e",70880:"2461af4d",71535:"1a215693",71580:"179d57fa",72141:"a877f9e5",72249:"38b375cc",72286:"3d963cd5",72293:"56087ddb",72326:"1ef65056",72912:"13f17286",73060:"75406112",73249:"5225b7e0",73419:"8e32e44d",73443:"91e9cb67",73602:"af1e70b6",73796:"6a95f87e",74275:"792eae7e",74772:"4997ef4f",74963:"6f479459",75088:"7a1d9beb",75488:"cbc3190d",75586:"e7250a93",75658:"c23b5e26",76258:"3dafa002",76300:"d252041b",76454:"34f00221",76515:"9cb6fa7c",76572:"58486a98",76843:"62983deb",77225:"51878b77",77390:"b35e9998",77491:"d3093636",77682:"50f80512",77697:"39582d99",77969:"5887dc62",78047:"3b75a1c6",78480:"26631026",78579:"0b416bde",78817:"88e44e99",79043:"00353f68",79079:"efc1978d",79088:"0a954229",79328:"a0434473",79362:"34ff21b6",80053:"935f2afb",80112:"8fd0c721",80322:"b576c645",80598:"e1269457",80707:"ab9a8589",80751:"556ad472",80829:"ee6cea3b",80837:"17f418b7",80874:"b92ffdc6",80982:"2fb6c1b0",81102:"d25cde70",81340:"e1b2b5f3",81663:"00056db7",81787:"e4ba4487",81878:"fa889bee",81913:"1d820955",81970:"01a15f20",82158:"f7580424",82412:"12d85ab5",82569:"e03f5a54",82643:"f228fdc5",82668:"3a1147b5",82681:"62ccfbde",82723:"23af8e3e",82744:"f8597cb0",82880:"e1ed1e14",82994:"48cf7354",83251:"c34b1e36",83378:"fbb011bb",83393:"3e9a7422",83669:"d707d2ed",83781:"866b91a1",83790:"604b2d50",84202:"c2492d56",84207:"f14633e7",84298:"6094981e",84880:"f8aab3ca",84908:"59d9fecc",84954:"f7a32432",84979:"e8b9c2c7",85043:"8eb438b4",85122:"c2e40c34",85332:"321ba9c5",85433:"ffb7f88a",85648:"ee83122d",85729:"56d039a1",85847:"4e044b4e",86184:"4130db01",86248:"7bf16cc6",86482:"3c22aa66",86704:"67e06a8b",86856:"b80b493a",87320:"c40984d1",87384:"b35d004b",87452:"93ba15e0",87597:"66fcb786",87684:"b38eec89",87808:"b28fe732",88003:"5066efb2",88838:"70e8008e",88984:"d535b358",89220:"a2b5e5df",89262:"120352d6",89407:"deef0af8",89450:"83203dd1",89461:"6ff7775f",89463:"a19ce767",89767:"c413b43a",89864:"1dc2f362",90288:"69bb4b11",90544:"4585b70e",91078:"80472ecc",91103:"3ac37a06",91210:"e2ce57ed",91214:"cc52e3cd",91387:"d36f3429",91391:"2137a7cd",91570:"4de42975",91747:"85aed2f9",91755:"bc4bf151",91824:"c90c942b",91871:"8590feaa",91895:"78aebd5d",91921:"8328d740",92018:"dc7f7ac1",92045:"0e0ee9b2",92059:"b33598ac",92321:"325e22bd",92805:"d175df5e",92836:"8a46fce0",92867:"5dbf4619",92982:"3647ac9b",93089:"a6aa9e1f",93377:"318ccb43",94330:"d8b3ac6a",94353:"1f31a275",94395:"0e2bf69e",94508:"e464d112",94548:"53a5cb1e",94553:"d7f58694",94579:"94d743d6",94597:"d0cb74df",94603:"f7bfff73",94623:"e402e26d",94702:"5a3e1bca",94744:"1014102e",94756:"4cee39d7",94916:"82878c84",94994:"109f6864",95049:"f85db098",95064:"e9b75d67",95521:"283b8f99",95584:"bd1bcb2c",95602:"57ef17c5",95717:"d0c5e4fa",95768:"a34b09d4",95974:"8cb11643",96350:"1e14124f",96673:"e5c4ef01",96816:"82d7cc79",96960:"1a62d1af",97030:"c764f2c0",97065:"fe5d19cb",97165:"294c060e",97297:"e0077969",97381:"4db82253",97715:"2a023663",97721:"0e16e9ea",97800:"09990b79",97920:"1a4e3797",97988:"4e432063",98050:"d3c9b0d7",98197:"eb0ef952",98462:"bc6a9944",98528:"7296bed0",98546:"f5b4e517",98583:"6bdbf6b8",99119:"2406587d",99128:"15516451",99147:"7aa49360",99161:"69dc6d0a",99374:"58b8f176",99378:"efbb59fb",99601:"fc47fc53",99781:"dea122e2",99970:"093b81f4"}[e]||e)+"."+{226:"7c60e348",271:"b472ae37",360:"0c049a9a",390:"16907556",403:"4c6e2b65",429:"c530f0a0",587:"25e7c11f",688:"feefe5ad",787:"4d89b7a8",806:"3689025c",847:"c07433f0",939:"dd4545ed",1084:"0194791b",1123:"640e13d4",1172:"105f1ab1",1283:"7a0aa43b",1286:"6ad5a370",1773:"3a54b67d",1802:"613317d6",1854:"c62a83e1",1985:"a23ad2e8",2070:"1b8bc6df",2195:"a42749a0",2381:"a9df8fd6",2408:"f68bf61a",2797:"ec2ce8dc",2901:"f0462d81",3202:"83225189",3554:"c9c5c25c",3611:"84cee5cb",3701:"ebe9d132",3765:"c7b94ddd",3814:"a51776da",3871:"3d351853",3953:"aa3ad659",4026:"91ffb89c",4193:"0c39c6fd",4204:"2942d62d",4503:"7ec496a3",4800:"f4fea7cb",5e3:"c4a07252",5253:"58f5ca42",5254:"d9a92894",5287:"b75edc26",5378:"be4cc3ff",5789:"294730b0",5909:"2b553671",6004:"fbfda01a",6289:"7e9ad05f",6304:"e1bb2c7e",6413:"406cd688",6633:"eeed6b75",6712:"11e439bd",7201:"97d9fe39",7234:"61faaca9",7509:"e0320a5b",7925:"45be218c",8006:"696da333",8040:"24249473",8146:"e8787811",8168:"805ffacc",8749:"fd6620c4",8839:"ef3c8cef",8966:"d111deb6",9296:"415afda5",9540:"fd3585d8",10643:"9ac2966d",10912:"5766341c",10978:"fa5141d9",11298:"a8bed199",11542:"4e543c37",11561:"4f426cb6",11977:"4f629e6e",12124:"92c4c2bb",12398:"ab17108f",12630:"89d0fd21",12824:"deab964d",12840:"15a46ad4",13431:"5b3464d7",13482:"f117cd47",14273:"a15ef647",14341:"5f31f2f7",14535:"e5b29187",15095:"1ee08781",15149:"b1d50f20",15165:"9fc5aa61",15253:"eb95bb49",15343:"b4f9c1d7",15404:"d4e54d08",15487:"e356abfe",15511:"8de44ef4",15566:"7af2a42d",15579:"80d18a93",15788:"aaa06e2d",15800:"0daa1137",15839:"f15dc558",16030:"a1d2c595",16269:"2a5ae6a5",16286:"3091a1cb",16551:"8334c80a",16833:"ceeed8df",16975:"25f84d8d",17026:"40a33067",17295:"e7002340",17538:"68b2fa1b",17542:"b345b319",17655:"caf41e88",17782:"9af38353",17817:"7a60488d",17820:"e028f5db",17973:"49a55e91",18373:"3b52ab84",18443:"8ed83508",18563:"a650d3a8",18825:"edd2110a",18828:"61dff53a",18894:"8ba79e33",18933:"82ff982d",19042:"f0e3a658",19336:"7f5c5c75",19465:"ba302a23",19483:"a04abc04",19603:"63140d8c",19775:"53d720b7",19950:"8e6a0524",21102:"e64026a7",21401:"25b8730b",21933:"284bd8a5",22007:"3c7d5195",22011:"12fd87b1",22287:"c1ff4071",22341:"a7c000a6",22577:"12ecb1e2",22707:"969377ed",22712:"204c8f47",22970:"02f3aa52",23036:"ba928913",23234:"8a1f3fd4",23544:"a2c9c406",23807:"f8ab332b",23838:"f96fd2ce",23894:"4a36373d",23941:"7eb4157a",23979:"da7d92d4",24046:"c95f1e59",24422:"14588453",24484:"290578f8",24572:"7376a9ae",24594:"aa995eb4",24627:"ad35e092",25116:"cadea3bd",25449:"28595d1a",25844:"67b2a5fc",25990:"d1dad3f6",26021:"6b9ce1ea",26184:"d1fb0dee",26676:"e6e7e7a4",26685:"04f76c9c",26871:"25fabaa5",27497:"8be44fac",27572:"92e50813",27813:"7670b946",27918:"1dd7c1ea",27991:"5af43466",28456:"42e1f8bb",28465:"bae03a70",28559:"53d784db",28634:"07a76afe",28696:"03d2fb68",28746:"258f03a6",28955:"3d9432bb",28957:"5c5fc2a0",29006:"bcf81dc8",29283:"a12f8681",29514:"216cf5d7",29535:"75ea1aee",29540:"3ae661ae",29696:"09a48be2",29806:"e13efecc",29845:"6229c1ae",30013:"9156458e",30023:"f70eac19",30301:"c4bbb92d",30335:"ebddc206",30460:"c79f944d",30464:"cfaedc94",30927:"5f7ddb44",31153:"c74b2845",31279:"771e833b",31333:"714da6d3",31836:"6afbe490",31896:"cce4e3d2",32185:"242c68ad",32599:"6ee262cb",32777:"9a4fbe82",32789:"d5b89e35",32855:"ab34218c",33064:"2b15599a",33195:"23458ac7",33358:"6875ed7c",33631:"a5a75b4b",33988:"0caae640",34334:"3a1990ef",34397:"50e63b4b",34654:"96dfebcb",34797:"cf49ef0b",34940:"7bb97a11",35015:"90bcb885",35024:"77f9b663",35040:"ba559dd9",35056:"6d5d38eb",35124:"6cc60e98",35328:"2329ba8e",35517:"5666354f",35537:"27d90dc8",35750:"064379cf",35939:"4fca9411",36005:"07beb68e",36204:"1b5a72e3",36224:"169580d7",36285:"770abe5c",36572:"3b4a629e",36585:"1717b0cf",36673:"6015f67e",36698:"24aeb102",36746:"b6d58204",36773:"a3eb6850",36828:"8ab9b559",36916:"627e234a",36958:"2b54b1f9",37001:"d663eac3",37069:"0133d096",37334:"bd502a3e",37392:"3674eb36",37474:"8fc13a5a",37486:"be80e33d",37592:"84e46938",37610:"5ca2e1ca",37638:"6fbb6657",37676:"98d2551a",37742:"a5e70451",37853:"6b0071e3",38057:"0aa70e90",38078:"eaa3b4f7",38261:"3e7aa5d5",38377:"bd09de84",38399:"4a04eae7",38759:"873ccf3d",38781:"2f3351a0",38846:"88bd3c18",38859:"7ccc674c",38962:"0236cee6",39139:"6f56c354",39242:"40e4acd1",39307:"5d7232c1",39402:"d6599b0a",39596:"511860eb",40023:"caff10d8",40367:"8325e1db",40412:"8ecdd605",40659:"c8eb504a",40852:"b29206c4",40950:"b8c4e793",41607:"38ca02bf",41719:"53c08f2d",41867:"7db460cb",41944:"4ec7bb98",41987:"0609e7ff",42037:"ad3a366f",42283:"a34f8074",42377:"4d69dc8d",42428:"7e0359d6",42522:"096248d0",42638:"082230ce",42802:"91ab538d",42815:"971a7a1a",42852:"bd015a5e",42947:"173cf0a5",42970:"0ca4866d",43156:"6e858d37",43251:"002f23aa",43344:"a5f5a399",43426:"97ac4633",43488:"3bcd1836",43730:"a22706a2",44177:"a7c47f47",44178:"6168e9e1",44334:"7f2d19ca",44422:"e1c7affe",44490:"901cceab",44504:"b8b106d8",44534:"7f1b5fc9",44539:"558a2d88",45282:"1eae2722",45370:"0d930dbf",45503:"a72ce2a2",45589:"7f42a2b5",45938:"ee06af10",46048:"f55105b0",46103:"4043289d",46135:"b492f885",46338:"073f7a96",46559:"d0e09414",46613:"0384012a",46807:"62a2c61f",46845:"4deeece5",46945:"fd128107",47454:"2e3cc69a",47552:"96f02664",48007:"1f47865f",48491:"2a20a978",48602:"f679dc4f",48807:"d5a3f483",49101:"a98291dd",49196:"867a0f29",49778:"cd04be09",49814:"cbb3c3ac",50002:"0471b2c6",50040:"e195e324",50196:"9f0c9a57",50297:"4da5326f",50653:"288194f1",50745:"fc78265d",50783:"3c16a52b",51078:"23039dab",51105:"9273b6ea",51317:"ff17fe39",51478:"66075e47",51732:"ea31977e",51842:"a88f573c",51910:"eff31342",51918:"0614f4d5",52406:"4d053caf",52433:"30590104",52447:"d06ddc63",52535:"b03595a3",52730:"7a6b4a14",52752:"355782e7",52787:"838e0402",52804:"eefbdfaa",53017:"9348a25f",53050:"6abd1a54",53090:"75338eaf",53140:"a29368cb",53338:"083cbf6d",53572:"7effd19a",53608:"72da96df",54528:"ff5b94ed",54890:"af1bc49c",55276:"facd650b",55305:"1e659f3b",55538:"81957284",55702:"24761a8e",55864:"16c1759c",56373:"f0ab3974",57074:"b14d0343",57168:"9a260310",57231:"f08ac08d",57258:"87fca17c",57689:"0759cd84",57887:"3c00aa5f",57924:"929e4f47",57930:"1e91e4cb",58092:"80d71d4e",58462:"ef15d1f1",58524:"b857b4f3",58738:"43f88bbc",58755:"70e7bcad",58846:"b5894e92",58861:"99c57d57",59076:"823e2f17",59394:"989a950f",59484:"edba3628",59823:"c4a7067d",60055:"fb65aa03",60445:"deb5d152",60880:"4828013e",61407:"8dcb13c6",61428:"d1a4d228",61442:"6214188f",61755:"85679afc",61778:"42efbba1",61829:"79c4438f",62079:"3d50e52d",62106:"b11711e1",62217:"f597b4a5",62465:"26755e65",62565:"7331c10f",62675:"0c1bfb62",62721:"a55c8bbb",62750:"33813778",62773:"35944844",63002:"cce5fea8",63099:"11b155f6",63196:"3a5e22f4",63402:"9a4cd328",63448:"cdc541ce",64172:"85cd1357",64195:"13fd239b",64233:"871b4ef9",64482:"d6d86e59",64713:"ce848f45",64927:"0b0a4e1c",64992:"2852555a",65437:"08ba8e6c",65752:"d9a972a6",65942:"3ac80a08",66167:"22cd1bfb",66419:"e6a0b17c",66555:"4058ffc7",66808:"061aa888",66945:"1e9742d3",66981:"42ac147f",67218:"4d5c911a",67287:"30019ebd",67394:"0c477410",67448:"ea219a5e",67597:"1f3a4c28",67664:"1f13f78c",67706:"b758af82",67870:"668faecc",68070:"5466f725",68283:"90b5bff2",68314:"5990284f",68526:"93b7d091",68549:"c424522e",68585:"01ca353e",68809:"34affa88",69159:"e2f680f0",69226:"ec923500",69314:"dc5f5244",69559:"fc9487d1",69966:"008e5e0b",70224:"02e885c1",70344:"6ee9dc6c",70406:"5572fd3b",70880:"660ff070",71535:"d1cced29",71580:"60736693",72141:"3c8ed086",72249:"833d6d75",72286:"97cc98df",72293:"5635000a",72326:"24f71dd5",72912:"622d8a7f",73060:"a4a4b44f",73249:"6b863ffe",73419:"78729bc5",73443:"f064c580",73602:"68c9c4d9",73796:"52f85e79",74275:"04909339",74772:"fe2304b2",74963:"3f156112",75088:"4d3332b5",75488:"80bb8867",75586:"ced09747",75658:"39b67445",76258:"e4fa02c3",76300:"de74179d",76454:"392d130c",76515:"09d48160",76572:"844b09b1",76780:"9fcdbad9",76843:"1987905e",77225:"d02aa824",77390:"a83939c0",77491:"28d99640",77682:"dd07edcb",77697:"8ab49adf",77969:"f101a58f",78047:"17c85fed",78480:"6094a26e",78579:"29367017",78817:"861d7487",79043:"9386a8fc",79079:"550ded59",79088:"4dea5ba6",79328:"01e88565",79362:"0185cc0f",80053:"4c83d661",80112:"fd699d0f",80322:"c2a5fb58",80598:"9604dd74",80707:"693fd0ce",80751:"17b0c135",80829:"a2ebc88c",80837:"842e3aa3",80874:"bf4d4db4",80982:"3a6c096a",81102:"ddb287b3",81340:"c70c4ed4",81663:"16985740",81787:"e94876f2",81878:"5025bbc2",81913:"45c6b533",81970:"f817585c",82158:"34c7f206",82412:"06ef2967",82569:"b8197956",82643:"aab4adeb",82668:"86174e6f",82681:"431ff015",82723:"2533d7d1",82744:"251a5009",82880:"e515703a",82994:"db964a35",83251:"5b905490",83378:"30d16d45",83393:"b4209ab4",83669:"d3240ea5",83781:"40717c05",83790:"eaac0795",84202:"3d5cb62e",84207:"75958044",84298:"a9f205e4",84880:"ee402f90",84908:"2baaa7ee",84954:"a71c1740",84979:"2a25ce30",85043:"f98b3ebd",85122:"e3efa00f",85332:"4a6b1343",85433:"81df0347",85648:"e46388c0",85729:"0a998fbe",85847:"0064d3f3",86184:"ce1a31c9",86248:"41094952",86482:"79b92687",86704:"4f81ddb2",86856:"985584fd",87320:"e9b88331",87384:"6d8d8c86",87452:"b3e364f6",87597:"41108fd6",87684:"e7b3125d",87780:"f43ae5ba",87808:"37b0acfa",88003:"4b4a7a4c",88838:"1a748968",88984:"3ff08287",89220:"e11e9cc5",89262:"7b9ebd97",89407:"e589fd66",89450:"f36475fd",89461:"2a186faa",89463:"fcc8cf1f",89767:"5f64aaac",89864:"da57dee1",90288:"f854e6db",90544:"ebad36c0",91078:"8a9dd085",91103:"634f6370",91210:"f250c9ac",91214:"9600caec",91387:"50b3c0bf",91391:"6e8550b2",91570:"c5bc4d89",91747:"1df1ce06",91755:"efaa05fe",91824:"d2381236",91871:"264fce60",91895:"7233fcae",91921:"0f73e2cb",92018:"18420d4b",92045:"b7082472",92059:"3cb43b94",92321:"a13df7c4",92805:"bc260ba1",92836:"e6ce93fd",92867:"86a6b9e8",92982:"902b0046",93089:"5241fd92",93377:"aa0a6603",94330:"c6afff6b",94353:"a5690af9",94395:"8bb62ead",94508:"a7b3b267",94548:"7223c440",94553:"49648640",94579:"17f5fc3c",94597:"985993f6",94603:"107f7123",94623:"552eed53",94702:"239c52b1",94744:"1273506a",94756:"e8ca3715",94916:"ba81f68e",94994:"e5301c26",95049:"1d1e73a8",95064:"a0f894d7",95521:"64a26515",95584:"9c262440",95602:"279c2c7b",95717:"2a4b9c2d",95768:"92ce7313",95974:"53f9c073",96350:"146afc64",96673:"1c5fb002",96816:"d0dd057b",96960:"05a860df",97030:"47d88247",97065:"6050493a",97165:"c0ed39cb",97297:"2ae26e66",97381:"3bd2df9d",97715:"61cec5c2",97721:"489ca716",97800:"28867a86",97920:"b813a053",97988:"956d5ccf",98050:"0cdbfd5b",98197:"69b26a94",98462:"9f99c59e",98528:"f7d9cf13",98546:"238da816",98583:"1cb4799b",99119:"8e792955",99128:"abb2247b",99147:"2e0f9fd2",99161:"3e7e608f",99374:"4be47182",99378:"e5729889",99601:"3a075b02",99781:"0dcdc1ac",99970:"5f073138"}[e]+".js",r.miniCssF=e=>{},r.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||new Function("return this")()}catch(e){if("object"==typeof window)return window}}(),r.o=(e,b)=>Object.prototype.hasOwnProperty.call(e,b),d={},a="synapseml:",r.l=(e,b,f,c)=>{if(d[e])d[e].push(b);else{var t,o;if(void 0!==f)for(var n=document.getElementsByTagName("script"),i=0;i{t.onerror=t.onload=null,clearTimeout(s);var a=d[e];if(delete d[e],t.parentNode&&t.parentNode.removeChild(t),a&&a.forEach((e=>e(f))),b)return b(f)},s=setTimeout(l.bind(null,void 0,{type:"timeout",target:t}),12e4);t.onerror=l.bind(null,t.onerror),t.onload=l.bind(null,t.onload),o&&document.head.appendChild(t)}},r.r=e=>{"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},r.p="/SynapseML/",r.gca=function(e){return e={15516451:"99128",17896441:"27918",20641120:"787",26631026:"78480",55203621:"3554",71042800:"59484",75406112:"73060",75801678:"6289",90742975:"36005","32fe2e34":"226",e8d66fc1:"271","88b63415":"360","001aea4c":"390","7b60d8e3":"403",c4d68582:"429",b83021be:"587",b66ab6b5:"688",b3272230:"806","73c2022c":"847","0a65b9de":"939","421d486c":"1084","1f6f5ac9":"1123","255635fc":"1172",f264933a:"1283",fd4bd09d:"1286","3cbfbf75":"1773","513002de":"1802",ad10988d:"1854","368de161":"1985","6c82e982":"2070",b72abe57:"2195","59e4d88e":"2381","09bc1516":"2408",e4e95844:"2797","980b00ea":"2901","6c61f93b":"3202","6e159789":"3611","513214d5":"3701",d1428557:"3765",b41f295c:"3814","445012ce":"3871","60a2189a":"3953","73cf52e8":"4026","1ad91055":"4193",ea2d2bcc:"4204","1e0d2d3b":"4503",c065fcb9:"4800",cf57716c:"5000","405fabd0":"5253","33b3776b":"5254",b8963dc0:"5287",fc338b02:"5378","1155dc6f":"5789","78cb5959":"5909","10b7acf6":"6004","5e008bd1":"6304","4bbbdfcf":"6413","707d2a35":"6633",ed40d5f1:"6712","567cd2fd":"7201","268b7b40":"7234","9530a2cf":"7509","9c5088a2":"7925","9da9112f":"8006",e24ee294:"8040",d13e532e:"8146",e7bd83ca:"8168","2d527871":"8749",a563fa93:"8839","41f327fc":"8966","1518f45a":"9296","4f1ca6a9":"9540","014ce9a3":"10643",e12b4691:"10912",ac581902:"10978",c76e8cac:"11298",fba72e87:"11542","2ece0fbe":"11561","3b5de274":"11977",d05981ec:"12124","6c7ccb6d":"12398","5962ef6e":"12630","75514f6a":"12824","4bea8531":"12840","1b3ab3b8":"13431",ef6ec597:"13482","1b7af47d":"14273",a5ffe972:"14341","1842239f":"14535","90e4432b":"15095","603706f6":"15149","4f81391a":"15165",b7787e72:"15253","1abf1e18":"15343",ac39f7dc:"15404","3a4f9d93":"15487",a9330763:"15511",ebf648dd:"15566",dd29718b:"15579",f3e986ee:"15788","18c7d2a3":"15800","07f6b90a":"15839","0f113696":"16030","76d23901":"16269",bef0d2d3:"16286","3c1f4383":"16551","5b779334":"16833","51dbb02f":"16975","3ea19ec0":"17026","6eddee4c":"17295",ad139e3c:"17538","66d63bfc":"17542",b1706689:"17655",b328a361:"17782","1766722a":"17817",dd3f9ada:"17820","63cd8b22":"17973","445a1b28":"18373",ceeda143:"18443",d84904f9:"18563",c73cfc52:"18825","514f485e":"18828","11c317cb":"18933",c14fdd92:"19042","8bff2444":"19336","548964ce":"19465","15ea8379":"19483","2b30bc14":"19603",b2be3901:"19775","275c0efd":"19950","20e0fe38":"21102","6e4e5cd9":"21401",cb7c2a83:"21933",a34c49d0:"22007",e316de39:"22011","5407376f":"22287",aa5548ab:"22341","8af72580":"22577","49e6864b":"22707","6b40ec54":"22712","88f8e3e1":"22970","052105dd":"23036",c0391845:"23234","4e3910af":"23544",f21c13c4:"23807","92ce4fd5":"23838","35826f14":"23894","2bcda99e":"23941","48329b94":"23979","7aeb6f16":"24046",c5c95e45:"24422","1a3608a9":"24484",b962e007:"24594",e579bad6:"24627","10e13725":"25116",cf666d49:"25449",cee4c3b9:"25844","38d6824e":"25990","295a8e78":"26021","0eaf8468":"26184","03d0d544":"26676",c2e57a1b:"26685","8a1bca9d":"26871",ce40236b:"27497",a07d09c1:"27572","35bdc661":"27813",dc1e40d7:"27991","8d990105":"28456","6e2678b7":"28465",dac82c00:"28559",f1b2dc7f:"28634",b066233f:"28696",d620a661:"28746","6e71bda3":"28955","2d5b0e6a":"28957","9783ff24":"29006","7c007ec6":"29283","1be78505":"29514","905ce8b5":"29535",c82134d8:"29540","166d99f5":"29696",b9f125cc:"29806","39a50b22":"29845","5eddefdd":"30013",b739c60a:"30023","627aefca":"30301","22444eb9":"30335","0a9c6347":"30460","47bbce76":"30464",a6191053:"30927",c95882d3:"31153",fd3b6ea5:"31279","521f9727":"31333","645abc7d":"31836","9fa7e8eb":"31896","561bd03d":"32185","5f17911b":"32599","8ff6772d":"32777","0606f1a5":"32789","5bf420e9":"32855",da508cef:"33064","6a26e359":"33195","5c18deb5":"33358","90f00051":"33631","01587ade":"33988","141dc4a8":"34334","3e1e84da":"34397",e887b273:"34654","0d432c84":"34797","7275f11a":"34940","2c5f0abd":"35015","0daa5b3f":"35024","767a7177":"35040",c3b1d949:"35056","061b38c5":"35124","54287b47":"35328","22f921cc":"35517",c3dac62d:"35537","0637d1e7":"35750","0453ae68":"35939","50af03e4":"36204","6c7d288d":"36224",b0e28723:"36285","589adaf2":"36572",b7226801:"36585",affaa541:"36673",aba932dd:"36698","5ef28062":"36746",b4ae214b:"36773",df01388f:"36828",f027bb15:"36916","1e57c92a":"36958","83586bff":"37001","40b49758":"37069","27de0303":"37334",ef77ef39:"37392","21f47424":"37474",a406e231:"37486","0713a501":"37592","077ca2ec":"37610",d57515f7:"37638","4631a032":"37676","5efd5006":"37742",d9e5e528:"38057","6f8cd013":"38078","012c1c0f":"38261","70b306ba":"38377","327be84b":"38399","16d05917":"38759","5e7c3303":"38781",bf04857e:"38846",b7802ae1:"38859","4b18e5ae":"38962",ae89cd3b:"39139",bb7a48da:"39242","0d6d64cd":"39307","16d8ca91":"39402","4e105cba":"39596","2e93ae0f":"40023",fece3c24:"40367","9e5b788f":"40412","610d430a":"40659","965ed185":"40852","1db1f785":"40950","04804d02":"41607",b66e2100:"41719","391cb159":"41867","79114dfe":"41944","2cec6bcf":"41987","328f3af4":"42037",e3089280:"42283","1789daeb":"42377","31ee0fa3":"42428","6c4c8509":"42522",e21a3367:"42638","3ee83f58":"42802","993c5022":"42815","423540f2":"42852","42736d5f":"42947",de3bb7c3:"42970","1587ba1d":"43156","455aa1bd":"43251","995576e9":"43344","494501bd":"43426","336404dc":"43488","034e6169":"43730",d41234a3:"44177","70fe7dd7":"44178","00ab2cda":"44334",cd8e97a5:"44422",d81d1137:"44490","99e5ffa1":"44504","2d8a44d7":"44534",fe6131e8:"44539","104b9b56":"45282","7649ec47":"45370",bc85b804:"45503","7ad384dc":"45589","8799ef55":"45938",ccc49370:"46103","96cf5ff0":"46135","0fb98d33":"46338","98258c5e":"46559",b4898d44:"46613",c39e3602:"46807","79d79762":"46845",c5775233:"47454","01eb996b":"47552","408524d4":"48007","59ef8022":"48491","25fb872c":"48602","13246fd5":"48807",eed630e0:"49101",efbc69e1:"49196","3e7c5569":"49778","3a7a88af":"49814",fc598b5d:"50002",a81cf8b0:"50040","9c279ae7":"50196",fe2c893b:"50297","97592aac":"50653","069e3c4c":"50745","5c0b440a":"50783",ff428354:"51078","6b9bdd6f":"51105","6a9ad78d":"51317",b4a95996:"51478",e9aab890:"51732",d8d6ba90:"51842","644ff257":"51910","9688a5be":"51918",c4d09a44:"52406","345903d4":"52433","2e5f4263":"52447","814f3328":"52535",a3fd5a6d:"52730","9b020c4a":"52752",ed5c6c48:"52787","12b89d4a":"52804",bf002efe:"53017","943afd32":"53050","86ed1ff2":"53090",b88f1f9b:"53338",a8456d9f:"53572","9e4087bc":"53608",cb5f1a06:"54528",f537d1e9:"54890","76b137bf":"55276",ca0a1bad:"55305",bece3771:"55538","41ee0cff":"55702",ce7af6ae:"55864",cff2e56e:"56373","1293b2b0":"57074",d9837698:"57168","9d095dba":"57231",e2689dfc:"57258","39122aab":"57689",be95fd8f:"57887","15a59c33":"57924",d39aa6d3:"57930","4d31bfb3":"58092",d989c707:"58462","0349e4f0":"58524",b940e0b5:"58738","990f07de":"58755","3bed3e2f":"58846","7c2e1100":"58861",dfb6dfe9:"59076","4801d370":"59394","76bf568a":"59823","403e642d":"60055",f322d2da:"60445",f0649d54:"60880","3ef20ed6":"61407",b5375b6f:"61428",b75118f0:"61442","04fc65cc":"61755",a0d8a1a2:"61778",f38a0cbd:"61829","5cfa133c":"62079","2c98ca3e":"62106","04ee4ab4":"62217",ddacd194:"62465",c38c658c:"62565","4ef2f7cf":"62675",d95a70ea:"62721","52880d18":"62750","08019edc":"62773",bf8a5c8a:"63002","95d9e891":"63099","74091a62":"63196","56543a15":"63402",ed6d544d:"63448","3bbe0f34":"64172",c4f5d8e4:"64195",b7c6295c:"64233","0e3f998d":"64482",d355047f:"64713",fa2e2688:"64927","37ab9beb":"64992","44ff0d07":"65437",c3c516ff:"65752","900b935c":"65942","74659d33":"66167",e5fe80e3:"66419","0fa36e32":"66555","8462374b":"66808","296054a7":"66945",e863531f:"66981",ba9c0924:"67218","99f8fee5":"67287","31bb7274":"67394","888e2229":"67448",a608660b:"67597",faca745a:"67664",db170a23:"67706",ed316aed:"67870",a1824f78:"68070",e84755e1:"68283","104bf0a6":"68314","3201258e":"68526",c991f47b:"68549","18c9c2ba":"68585",fd748117:"68809","57e687e8":"69159","8181e18a":"69226","232d09b0":"69314","3fb29942":"69559",d1246f49:"69966","77c259dd":"70224","7aa7b8e0":"70344","6a07b55e":"70406","2461af4d":"70880","1a215693":"71535","179d57fa":"71580",a877f9e5:"72141","38b375cc":"72249","3d963cd5":"72286","56087ddb":"72293","1ef65056":"72326","13f17286":"72912","5225b7e0":"73249","8e32e44d":"73419","91e9cb67":"73443",af1e70b6:"73602","6a95f87e":"73796","792eae7e":"74275","4997ef4f":"74772","6f479459":"74963","7a1d9beb":"75088",cbc3190d:"75488",e7250a93:"75586",c23b5e26:"75658","3dafa002":"76258",d252041b:"76300","34f00221":"76454","9cb6fa7c":"76515","58486a98":"76572","62983deb":"76843","51878b77":"77225",b35e9998:"77390",d3093636:"77491","50f80512":"77682","39582d99":"77697","5887dc62":"77969","3b75a1c6":"78047","0b416bde":"78579","88e44e99":"78817","00353f68":"79043",efc1978d:"79079","0a954229":"79088",a0434473:"79328","34ff21b6":"79362","935f2afb":"80053","8fd0c721":"80112",b576c645:"80322",e1269457:"80598",ab9a8589:"80707","556ad472":"80751",ee6cea3b:"80829","17f418b7":"80837",b92ffdc6:"80874","2fb6c1b0":"80982",d25cde70:"81102",e1b2b5f3:"81340","00056db7":"81663",e4ba4487:"81787",fa889bee:"81878","1d820955":"81913","01a15f20":"81970",f7580424:"82158","12d85ab5":"82412",e03f5a54:"82569",f228fdc5:"82643","3a1147b5":"82668","62ccfbde":"82681","23af8e3e":"82723",f8597cb0:"82744",e1ed1e14:"82880","48cf7354":"82994",c34b1e36:"83251",fbb011bb:"83378","3e9a7422":"83393",d707d2ed:"83669","866b91a1":"83781","604b2d50":"83790",c2492d56:"84202",f14633e7:"84207","6094981e":"84298",f8aab3ca:"84880","59d9fecc":"84908",f7a32432:"84954",e8b9c2c7:"84979","8eb438b4":"85043",c2e40c34:"85122","321ba9c5":"85332",ffb7f88a:"85433",ee83122d:"85648","56d039a1":"85729","4e044b4e":"85847","4130db01":"86184","7bf16cc6":"86248","3c22aa66":"86482","67e06a8b":"86704",b80b493a:"86856",c40984d1:"87320",b35d004b:"87384","93ba15e0":"87452","66fcb786":"87597",b38eec89:"87684",b28fe732:"87808","5066efb2":"88003","70e8008e":"88838",d535b358:"88984",a2b5e5df:"89220","120352d6":"89262",deef0af8:"89407","83203dd1":"89450","6ff7775f":"89461",a19ce767:"89463",c413b43a:"89767","1dc2f362":"89864","69bb4b11":"90288","4585b70e":"90544","80472ecc":"91078","3ac37a06":"91103",e2ce57ed:"91210",cc52e3cd:"91214",d36f3429:"91387","2137a7cd":"91391","4de42975":"91570","85aed2f9":"91747",bc4bf151:"91755",c90c942b:"91824","8590feaa":"91871","78aebd5d":"91895","8328d740":"91921",dc7f7ac1:"92018","0e0ee9b2":"92045",b33598ac:"92059","325e22bd":"92321",d175df5e:"92805","8a46fce0":"92836","5dbf4619":"92867","3647ac9b":"92982",a6aa9e1f:"93089","318ccb43":"93377",d8b3ac6a:"94330","1f31a275":"94353","0e2bf69e":"94395",e464d112:"94508","53a5cb1e":"94548",d7f58694:"94553","94d743d6":"94579",d0cb74df:"94597",f7bfff73:"94603",e402e26d:"94623","5a3e1bca":"94702","1014102e":"94744","4cee39d7":"94756","82878c84":"94916","109f6864":"94994",f85db098:"95049",e9b75d67:"95064","283b8f99":"95521",bd1bcb2c:"95584","57ef17c5":"95602",d0c5e4fa:"95717",a34b09d4:"95768","8cb11643":"95974","1e14124f":"96350",e5c4ef01:"96673","82d7cc79":"96816","1a62d1af":"96960",c764f2c0:"97030",fe5d19cb:"97065","294c060e":"97165",e0077969:"97297","4db82253":"97381","2a023663":"97715","0e16e9ea":"97721","09990b79":"97800","1a4e3797":"97920","4e432063":"97988",d3c9b0d7:"98050",eb0ef952:"98197",bc6a9944:"98462","7296bed0":"98528",f5b4e517:"98546","6bdbf6b8":"98583","2406587d":"99119","7aa49360":"99147","69dc6d0a":"99161","58b8f176":"99374",efbb59fb:"99378",fc47fc53:"99601",dea122e2:"99781","093b81f4":"99970"}[e]||e,r.p+r.u(e)},(()=>{var e={51303:0,40532:0};r.f.j=(b,f)=>{var d=r.o(e,b)?e[b]:void 0;if(0!==d)if(d)f.push(d[2]);else if(/^(40532|51303)$/.test(b))e[b]=0;else{var a=new Promise(((f,a)=>d=e[b]=[f,a]));f.push(d[2]=a);var c=r.p+r.u(b),t=new Error;r.l(c,(f=>{if(r.o(e,b)&&(0!==(d=e[b])&&(e[b]=void 0),d)){var a=f&&("load"===f.type?"missing":f.type),c=f&&f.target&&f.target.src;t.message="Loading chunk "+b+" failed.\n("+a+": "+c+")",t.name="ChunkLoadError",t.type=a,t.request=c,d[1](t)}}),"chunk-"+b,b)}},r.O.j=b=>0===e[b];var b=(b,f)=>{var d,a,c=f[0],t=f[1],o=f[2],n=0;if(c.some((b=>0!==e[b]))){for(d in t)r.o(t,d)&&(r.m[d]=t[d]);if(o)var i=o(r)}for(b&&b(f);n{"use strict";var e,b,f,d,a,c={},t={};function r(e){var b=t[e];if(void 0!==b)return b.exports;var f=t[e]={exports:{}};return c[e].call(f.exports,f,f.exports,r),f.exports}r.m=c,e=[],r.O=(b,f,d,a)=>{if(!f){var c=1/0;for(i=0;i=a)&&Object.keys(r.O).every((e=>r.O[e](f[o])))?f.splice(o--,1):(t=!1,a0&&e[i-1][2]>a;i--)e[i]=e[i-1];e[i]=[f,d,a]},r.n=e=>{var b=e&&e.__esModule?()=>e.default:()=>e;return r.d(b,{a:b}),b},f=Object.getPrototypeOf?e=>Object.getPrototypeOf(e):e=>e.__proto__,r.t=function(e,d){if(1&d&&(e=this(e)),8&d)return e;if("object"==typeof e&&e){if(4&d&&e.__esModule)return e;if(16&d&&"function"==typeof e.then)return e}var a=Object.create(null);r.r(a);var c={};b=b||[null,f({}),f([]),f(f)];for(var t=2&d&&e;"object"==typeof t&&!~b.indexOf(t);t=f(t))Object.getOwnPropertyNames(t).forEach((b=>c[b]=()=>e[b]));return c.default=()=>e,r.d(a,c),a},r.d=(e,b)=>{for(var f in b)r.o(b,f)&&!r.o(e,f)&&Object.defineProperty(e,f,{enumerable:!0,get:b[f]})},r.f={},r.e=e=>Promise.all(Object.keys(r.f).reduce(((b,f)=>(r.f[f](e,b),b)),[])),r.u=e=>"assets/js/"+({226:"32fe2e34",271:"e8d66fc1",360:"88b63415",390:"001aea4c",403:"7b60d8e3",429:"c4d68582",587:"b83021be",688:"b66ab6b5",787:"20641120",806:"b3272230",847:"73c2022c",939:"0a65b9de",1084:"421d486c",1123:"1f6f5ac9",1172:"255635fc",1283:"f264933a",1286:"fd4bd09d",1773:"3cbfbf75",1802:"513002de",1854:"ad10988d",1985:"368de161",2070:"6c82e982",2195:"b72abe57",2381:"59e4d88e",2408:"09bc1516",2797:"e4e95844",2901:"980b00ea",3202:"6c61f93b",3554:"55203621",3611:"6e159789",3701:"513214d5",3765:"d1428557",3814:"b41f295c",3871:"445012ce",3953:"60a2189a",4026:"73cf52e8",4193:"1ad91055",4204:"ea2d2bcc",4503:"1e0d2d3b",4800:"c065fcb9",5e3:"cf57716c",5253:"405fabd0",5254:"33b3776b",5287:"b8963dc0",5378:"fc338b02",5789:"1155dc6f",5909:"78cb5959",6004:"10b7acf6",6289:"75801678",6304:"5e008bd1",6413:"4bbbdfcf",6633:"707d2a35",6712:"ed40d5f1",7201:"567cd2fd",7234:"268b7b40",7509:"9530a2cf",7925:"9c5088a2",8006:"9da9112f",8040:"e24ee294",8146:"d13e532e",8168:"e7bd83ca",8749:"2d527871",8839:"a563fa93",8966:"41f327fc",9296:"1518f45a",9540:"4f1ca6a9",10643:"014ce9a3",10912:"e12b4691",10978:"ac581902",11298:"c76e8cac",11542:"fba72e87",11561:"2ece0fbe",11977:"3b5de274",12124:"d05981ec",12398:"6c7ccb6d",12630:"5962ef6e",12824:"75514f6a",12840:"4bea8531",13431:"1b3ab3b8",13482:"ef6ec597",14273:"1b7af47d",14341:"a5ffe972",14535:"1842239f",15095:"90e4432b",15149:"603706f6",15165:"4f81391a",15253:"b7787e72",15343:"1abf1e18",15404:"ac39f7dc",15487:"3a4f9d93",15511:"a9330763",15566:"ebf648dd",15579:"dd29718b",15788:"f3e986ee",15800:"18c7d2a3",15839:"07f6b90a",16030:"0f113696",16269:"76d23901",16286:"bef0d2d3",16551:"3c1f4383",16833:"5b779334",16975:"51dbb02f",17026:"3ea19ec0",17295:"6eddee4c",17538:"ad139e3c",17542:"66d63bfc",17655:"b1706689",17782:"b328a361",17817:"1766722a",17820:"dd3f9ada",17973:"63cd8b22",18373:"445a1b28",18443:"ceeda143",18563:"d84904f9",18825:"c73cfc52",18828:"514f485e",18933:"11c317cb",19042:"c14fdd92",19336:"8bff2444",19465:"548964ce",19483:"15ea8379",19603:"2b30bc14",19775:"b2be3901",19950:"275c0efd",21102:"20e0fe38",21401:"6e4e5cd9",21933:"cb7c2a83",22007:"a34c49d0",22011:"e316de39",22287:"5407376f",22341:"aa5548ab",22577:"8af72580",22707:"49e6864b",22712:"6b40ec54",22970:"88f8e3e1",23036:"052105dd",23234:"c0391845",23544:"4e3910af",23807:"f21c13c4",23838:"92ce4fd5",23894:"35826f14",23941:"2bcda99e",23979:"48329b94",24046:"7aeb6f16",24422:"c5c95e45",24484:"1a3608a9",24594:"b962e007",24627:"e579bad6",25116:"10e13725",25449:"cf666d49",25844:"cee4c3b9",25990:"38d6824e",26021:"295a8e78",26184:"0eaf8468",26676:"03d0d544",26685:"c2e57a1b",26871:"8a1bca9d",27497:"ce40236b",27572:"a07d09c1",27813:"35bdc661",27918:"17896441",27991:"dc1e40d7",28456:"8d990105",28465:"6e2678b7",28559:"dac82c00",28634:"f1b2dc7f",28696:"b066233f",28746:"d620a661",28955:"6e71bda3",28957:"2d5b0e6a",29006:"9783ff24",29283:"7c007ec6",29514:"1be78505",29535:"905ce8b5",29540:"c82134d8",29696:"166d99f5",29806:"b9f125cc",29845:"39a50b22",30013:"5eddefdd",30023:"b739c60a",30301:"627aefca",30335:"22444eb9",30460:"0a9c6347",30464:"47bbce76",30927:"a6191053",31153:"c95882d3",31279:"fd3b6ea5",31333:"521f9727",31836:"645abc7d",31896:"9fa7e8eb",32185:"561bd03d",32599:"5f17911b",32777:"8ff6772d",32789:"0606f1a5",32855:"5bf420e9",33064:"da508cef",33195:"6a26e359",33358:"5c18deb5",33631:"90f00051",33988:"01587ade",34334:"141dc4a8",34397:"3e1e84da",34654:"e887b273",34797:"0d432c84",34940:"7275f11a",35015:"2c5f0abd",35024:"0daa5b3f",35040:"767a7177",35056:"c3b1d949",35124:"061b38c5",35328:"54287b47",35517:"22f921cc",35537:"c3dac62d",35750:"0637d1e7",35939:"0453ae68",36005:"90742975",36204:"50af03e4",36224:"6c7d288d",36285:"b0e28723",36572:"589adaf2",36585:"b7226801",36673:"affaa541",36698:"aba932dd",36746:"5ef28062",36773:"b4ae214b",36828:"df01388f",36916:"f027bb15",36958:"1e57c92a",37001:"83586bff",37069:"40b49758",37334:"27de0303",37392:"ef77ef39",37474:"21f47424",37486:"a406e231",37592:"0713a501",37610:"077ca2ec",37638:"d57515f7",37676:"4631a032",37742:"5efd5006",38057:"d9e5e528",38078:"6f8cd013",38261:"012c1c0f",38377:"70b306ba",38399:"327be84b",38759:"16d05917",38781:"5e7c3303",38846:"bf04857e",38859:"b7802ae1",38962:"4b18e5ae",39139:"ae89cd3b",39242:"bb7a48da",39307:"0d6d64cd",39402:"16d8ca91",39596:"4e105cba",40023:"2e93ae0f",40367:"fece3c24",40412:"9e5b788f",40659:"610d430a",40852:"965ed185",40950:"1db1f785",41607:"04804d02",41719:"b66e2100",41867:"391cb159",41944:"79114dfe",41987:"2cec6bcf",42037:"328f3af4",42283:"e3089280",42377:"1789daeb",42428:"31ee0fa3",42522:"6c4c8509",42638:"e21a3367",42802:"3ee83f58",42815:"993c5022",42852:"423540f2",42947:"42736d5f",42970:"de3bb7c3",43156:"1587ba1d",43251:"455aa1bd",43344:"995576e9",43426:"494501bd",43488:"336404dc",43730:"034e6169",44177:"d41234a3",44178:"70fe7dd7",44334:"00ab2cda",44422:"cd8e97a5",44490:"d81d1137",44504:"99e5ffa1",44534:"2d8a44d7",44539:"fe6131e8",45282:"104b9b56",45370:"7649ec47",45503:"bc85b804",45589:"7ad384dc",45938:"8799ef55",46103:"ccc49370",46135:"96cf5ff0",46338:"0fb98d33",46559:"98258c5e",46613:"b4898d44",46807:"c39e3602",46845:"79d79762",47454:"c5775233",47552:"01eb996b",48007:"408524d4",48491:"59ef8022",48602:"25fb872c",48807:"13246fd5",49101:"eed630e0",49196:"efbc69e1",49778:"3e7c5569",49814:"3a7a88af",50002:"fc598b5d",50040:"a81cf8b0",50196:"9c279ae7",50297:"fe2c893b",50653:"97592aac",50745:"069e3c4c",50783:"5c0b440a",51078:"ff428354",51105:"6b9bdd6f",51317:"6a9ad78d",51478:"b4a95996",51732:"e9aab890",51842:"d8d6ba90",51910:"644ff257",51918:"9688a5be",52406:"c4d09a44",52433:"345903d4",52447:"2e5f4263",52535:"814f3328",52730:"a3fd5a6d",52752:"9b020c4a",52787:"ed5c6c48",52804:"12b89d4a",53017:"bf002efe",53050:"943afd32",53090:"86ed1ff2",53338:"b88f1f9b",53572:"a8456d9f",53608:"9e4087bc",54528:"cb5f1a06",54890:"f537d1e9",55276:"76b137bf",55305:"ca0a1bad",55538:"bece3771",55702:"41ee0cff",55864:"ce7af6ae",56373:"cff2e56e",57074:"1293b2b0",57168:"d9837698",57231:"9d095dba",57258:"e2689dfc",57689:"39122aab",57887:"be95fd8f",57924:"15a59c33",57930:"d39aa6d3",58092:"4d31bfb3",58462:"d989c707",58524:"0349e4f0",58738:"b940e0b5",58755:"990f07de",58846:"3bed3e2f",58861:"7c2e1100",59076:"dfb6dfe9",59394:"4801d370",59484:"71042800",59823:"76bf568a",60055:"403e642d",60445:"f322d2da",60880:"f0649d54",61407:"3ef20ed6",61428:"b5375b6f",61442:"b75118f0",61755:"04fc65cc",61778:"a0d8a1a2",61829:"f38a0cbd",62079:"5cfa133c",62106:"2c98ca3e",62217:"04ee4ab4",62465:"ddacd194",62565:"c38c658c",62675:"4ef2f7cf",62721:"d95a70ea",62750:"52880d18",62773:"08019edc",63002:"bf8a5c8a",63099:"95d9e891",63196:"74091a62",63402:"56543a15",63448:"ed6d544d",64172:"3bbe0f34",64195:"c4f5d8e4",64233:"b7c6295c",64482:"0e3f998d",64713:"d355047f",64927:"fa2e2688",64992:"37ab9beb",65437:"44ff0d07",65752:"c3c516ff",65942:"900b935c",66167:"74659d33",66419:"e5fe80e3",66555:"0fa36e32",66808:"8462374b",66945:"296054a7",66981:"e863531f",67218:"ba9c0924",67287:"99f8fee5",67394:"31bb7274",67448:"888e2229",67597:"a608660b",67664:"faca745a",67706:"db170a23",67870:"ed316aed",68070:"a1824f78",68283:"e84755e1",68314:"104bf0a6",68526:"3201258e",68549:"c991f47b",68585:"18c9c2ba",68809:"fd748117",69159:"57e687e8",69226:"8181e18a",69314:"232d09b0",69559:"3fb29942",69966:"d1246f49",70224:"77c259dd",70344:"7aa7b8e0",70406:"6a07b55e",70880:"2461af4d",71535:"1a215693",71580:"179d57fa",72141:"a877f9e5",72249:"38b375cc",72286:"3d963cd5",72293:"56087ddb",72326:"1ef65056",72912:"13f17286",73060:"75406112",73249:"5225b7e0",73419:"8e32e44d",73443:"91e9cb67",73602:"af1e70b6",73796:"6a95f87e",74275:"792eae7e",74772:"4997ef4f",74963:"6f479459",75088:"7a1d9beb",75488:"cbc3190d",75586:"e7250a93",75658:"c23b5e26",76258:"3dafa002",76300:"d252041b",76454:"34f00221",76515:"9cb6fa7c",76572:"58486a98",76843:"62983deb",77225:"51878b77",77390:"b35e9998",77491:"d3093636",77682:"50f80512",77697:"39582d99",77969:"5887dc62",78047:"3b75a1c6",78480:"26631026",78579:"0b416bde",78817:"88e44e99",79043:"00353f68",79079:"efc1978d",79088:"0a954229",79328:"a0434473",79362:"34ff21b6",80053:"935f2afb",80112:"8fd0c721",80322:"b576c645",80598:"e1269457",80707:"ab9a8589",80751:"556ad472",80829:"ee6cea3b",80837:"17f418b7",80874:"b92ffdc6",80982:"2fb6c1b0",81102:"d25cde70",81340:"e1b2b5f3",81663:"00056db7",81787:"e4ba4487",81878:"fa889bee",81913:"1d820955",81970:"01a15f20",82158:"f7580424",82412:"12d85ab5",82569:"e03f5a54",82643:"f228fdc5",82668:"3a1147b5",82681:"62ccfbde",82723:"23af8e3e",82744:"f8597cb0",82880:"e1ed1e14",82994:"48cf7354",83251:"c34b1e36",83378:"fbb011bb",83393:"3e9a7422",83669:"d707d2ed",83781:"866b91a1",83790:"604b2d50",84202:"c2492d56",84207:"f14633e7",84298:"6094981e",84880:"f8aab3ca",84908:"59d9fecc",84954:"f7a32432",84979:"e8b9c2c7",85043:"8eb438b4",85122:"c2e40c34",85332:"321ba9c5",85433:"ffb7f88a",85648:"ee83122d",85729:"56d039a1",85847:"4e044b4e",86184:"4130db01",86248:"7bf16cc6",86482:"3c22aa66",86704:"67e06a8b",86856:"b80b493a",87320:"c40984d1",87384:"b35d004b",87452:"93ba15e0",87597:"66fcb786",87684:"b38eec89",87808:"b28fe732",88003:"5066efb2",88838:"70e8008e",88984:"d535b358",89220:"a2b5e5df",89262:"120352d6",89407:"deef0af8",89450:"83203dd1",89461:"6ff7775f",89463:"a19ce767",89767:"c413b43a",89864:"1dc2f362",90288:"69bb4b11",90544:"4585b70e",91078:"80472ecc",91103:"3ac37a06",91210:"e2ce57ed",91214:"cc52e3cd",91387:"d36f3429",91391:"2137a7cd",91570:"4de42975",91747:"85aed2f9",91755:"bc4bf151",91824:"c90c942b",91871:"8590feaa",91895:"78aebd5d",91921:"8328d740",92018:"dc7f7ac1",92045:"0e0ee9b2",92059:"b33598ac",92321:"325e22bd",92805:"d175df5e",92836:"8a46fce0",92867:"5dbf4619",92982:"3647ac9b",93089:"a6aa9e1f",93377:"318ccb43",94330:"d8b3ac6a",94353:"1f31a275",94395:"0e2bf69e",94508:"e464d112",94548:"53a5cb1e",94553:"d7f58694",94579:"94d743d6",94597:"d0cb74df",94603:"f7bfff73",94623:"e402e26d",94702:"5a3e1bca",94744:"1014102e",94756:"4cee39d7",94916:"82878c84",94994:"109f6864",95049:"f85db098",95064:"e9b75d67",95521:"283b8f99",95584:"bd1bcb2c",95602:"57ef17c5",95717:"d0c5e4fa",95768:"a34b09d4",95974:"8cb11643",96350:"1e14124f",96673:"e5c4ef01",96816:"82d7cc79",96960:"1a62d1af",97030:"c764f2c0",97065:"fe5d19cb",97165:"294c060e",97297:"e0077969",97381:"4db82253",97715:"2a023663",97721:"0e16e9ea",97800:"09990b79",97920:"1a4e3797",97988:"4e432063",98050:"d3c9b0d7",98197:"eb0ef952",98462:"bc6a9944",98528:"7296bed0",98546:"f5b4e517",98583:"6bdbf6b8",99119:"2406587d",99128:"15516451",99147:"7aa49360",99161:"69dc6d0a",99374:"58b8f176",99378:"efbb59fb",99601:"fc47fc53",99781:"dea122e2",99970:"093b81f4"}[e]||e)+"."+{226:"7c60e348",271:"b472ae37",360:"0c049a9a",390:"16907556",403:"4c6e2b65",429:"c530f0a0",587:"25e7c11f",688:"feefe5ad",787:"4d89b7a8",806:"3689025c",847:"c07433f0",939:"dd4545ed",1084:"0194791b",1123:"640e13d4",1172:"105f1ab1",1283:"7a0aa43b",1286:"6ad5a370",1773:"3a54b67d",1802:"613317d6",1854:"c62a83e1",1985:"a23ad2e8",2070:"1b8bc6df",2195:"a42749a0",2381:"a9df8fd6",2408:"f68bf61a",2797:"ec2ce8dc",2901:"f0462d81",3202:"83225189",3554:"c9c5c25c",3611:"84cee5cb",3701:"ebe9d132",3765:"c7b94ddd",3814:"a51776da",3871:"3d351853",3953:"aa3ad659",4026:"91ffb89c",4193:"0c39c6fd",4204:"2942d62d",4503:"7ec496a3",4800:"f4fea7cb",5e3:"c4a07252",5253:"58f5ca42",5254:"d9a92894",5287:"b75edc26",5378:"be4cc3ff",5789:"294730b0",5909:"2b553671",6004:"fbfda01a",6289:"7e9ad05f",6304:"e1bb2c7e",6413:"406cd688",6633:"eeed6b75",6712:"11e439bd",7201:"97d9fe39",7234:"61faaca9",7509:"e0320a5b",7925:"45be218c",8006:"696da333",8040:"24249473",8146:"e8787811",8168:"805ffacc",8749:"fd6620c4",8839:"ef3c8cef",8966:"d111deb6",9296:"415afda5",9540:"fd3585d8",10643:"9ac2966d",10912:"5766341c",10978:"fa5141d9",11298:"a8bed199",11542:"4e543c37",11561:"4f426cb6",11977:"4f629e6e",12124:"92c4c2bb",12398:"ab17108f",12630:"89d0fd21",12824:"deab964d",12840:"15a46ad4",13431:"5b3464d7",13482:"f117cd47",14273:"ed1b0a2e",14341:"5f31f2f7",14535:"e5b29187",15095:"1ee08781",15149:"b1d50f20",15165:"9fc5aa61",15253:"eb95bb49",15343:"b4f9c1d7",15404:"d4e54d08",15487:"e356abfe",15511:"8de44ef4",15566:"7af2a42d",15579:"80d18a93",15788:"aaa06e2d",15800:"0daa1137",15839:"f15dc558",16030:"a1d2c595",16269:"2a5ae6a5",16286:"3091a1cb",16551:"8334c80a",16833:"ceeed8df",16975:"25f84d8d",17026:"40a33067",17295:"e7002340",17538:"68b2fa1b",17542:"b345b319",17655:"caf41e88",17782:"9af38353",17817:"7a60488d",17820:"7a974bf8",17973:"49a55e91",18373:"3b52ab84",18443:"8ed83508",18563:"a650d3a8",18825:"edd2110a",18828:"61dff53a",18894:"8ba79e33",18933:"82ff982d",19042:"f0e3a658",19336:"7f5c5c75",19465:"ba302a23",19483:"a04abc04",19603:"63140d8c",19775:"53d720b7",19950:"8e6a0524",21102:"e64026a7",21401:"25b8730b",21933:"284bd8a5",22007:"3c7d5195",22011:"12fd87b1",22287:"c1ff4071",22341:"a7c000a6",22577:"12ecb1e2",22707:"969377ed",22712:"204c8f47",22970:"02f3aa52",23036:"ba928913",23234:"f0bddc20",23544:"a2c9c406",23807:"f8ab332b",23838:"f96fd2ce",23894:"4a36373d",23941:"7eb4157a",23979:"da7d92d4",24046:"c95f1e59",24422:"14588453",24484:"290578f8",24572:"7376a9ae",24594:"aa995eb4",24627:"ad35e092",25116:"cadea3bd",25449:"28595d1a",25844:"67b2a5fc",25990:"d1dad3f6",26021:"6b9ce1ea",26184:"d1fb0dee",26676:"e6e7e7a4",26685:"04f76c9c",26871:"25fabaa5",27497:"8be44fac",27572:"92e50813",27813:"7670b946",27918:"1dd7c1ea",27991:"5af43466",28456:"42e1f8bb",28465:"bae03a70",28559:"53d784db",28634:"07a76afe",28696:"03d2fb68",28746:"258f03a6",28955:"3d9432bb",28957:"5c5fc2a0",29006:"bcf81dc8",29283:"a12f8681",29514:"216cf5d7",29535:"75ea1aee",29540:"3ae661ae",29696:"09a48be2",29806:"e13efecc",29845:"6229c1ae",30013:"9156458e",30023:"f70eac19",30301:"c4bbb92d",30335:"ebddc206",30460:"c79f944d",30464:"cfaedc94",30927:"5f7ddb44",31153:"c74b2845",31279:"771e833b",31333:"714da6d3",31836:"6afbe490",31896:"cce4e3d2",32185:"242c68ad",32599:"6ee262cb",32777:"9a4fbe82",32789:"d5b89e35",32855:"ab34218c",33064:"2b15599a",33195:"23458ac7",33358:"6875ed7c",33631:"a5a75b4b",33988:"0caae640",34334:"3a1990ef",34397:"50e63b4b",34654:"96dfebcb",34797:"cf49ef0b",34940:"7bb97a11",35015:"90bcb885",35024:"77f9b663",35040:"ba559dd9",35056:"6d5d38eb",35124:"6cc60e98",35328:"2329ba8e",35517:"5666354f",35537:"27d90dc8",35750:"064379cf",35939:"4fca9411",36005:"07beb68e",36204:"1b5a72e3",36224:"169580d7",36285:"770abe5c",36572:"3b4a629e",36585:"1717b0cf",36673:"6015f67e",36698:"24aeb102",36746:"b6d58204",36773:"a3eb6850",36828:"8ab9b559",36916:"627e234a",36958:"2b54b1f9",37001:"d663eac3",37069:"0133d096",37334:"bd502a3e",37392:"3674eb36",37474:"8fc13a5a",37486:"be80e33d",37592:"84e46938",37610:"5ca2e1ca",37638:"6fbb6657",37676:"98d2551a",37742:"a5e70451",37853:"6b0071e3",38057:"0aa70e90",38078:"eaa3b4f7",38261:"3e7aa5d5",38377:"bd09de84",38399:"4a04eae7",38759:"873ccf3d",38781:"2f3351a0",38846:"88bd3c18",38859:"7ccc674c",38962:"0236cee6",39139:"6f56c354",39242:"40e4acd1",39307:"5d7232c1",39402:"d6599b0a",39596:"511860eb",40023:"caff10d8",40367:"8325e1db",40412:"8ecdd605",40659:"c8eb504a",40852:"b29206c4",40950:"b8c4e793",41607:"38ca02bf",41719:"53c08f2d",41867:"7db460cb",41944:"4ec7bb98",41987:"0609e7ff",42037:"ad3a366f",42283:"a34f8074",42377:"4d69dc8d",42428:"7e0359d6",42522:"096248d0",42638:"082230ce",42802:"91ab538d",42815:"971a7a1a",42852:"bd015a5e",42947:"173cf0a5",42970:"0ca4866d",43156:"6e858d37",43251:"002f23aa",43344:"a5f5a399",43426:"97ac4633",43488:"3bcd1836",43730:"a22706a2",44177:"a7c47f47",44178:"6168e9e1",44334:"7f2d19ca",44422:"e1c7affe",44490:"901cceab",44504:"b8b106d8",44534:"7f1b5fc9",44539:"558a2d88",45282:"1eae2722",45370:"0d930dbf",45503:"a72ce2a2",45589:"7f42a2b5",45938:"ee06af10",46048:"f55105b0",46103:"4043289d",46135:"546126d5",46338:"073f7a96",46559:"d0e09414",46613:"0384012a",46807:"62a2c61f",46845:"4deeece5",46945:"fd128107",47454:"2e3cc69a",47552:"96f02664",48007:"1f47865f",48491:"2a20a978",48602:"f679dc4f",48807:"d5a3f483",49101:"a98291dd",49196:"867a0f29",49778:"cd04be09",49814:"cbb3c3ac",50002:"0471b2c6",50040:"e195e324",50196:"9f0c9a57",50297:"4da5326f",50653:"288194f1",50745:"fc78265d",50783:"3c16a52b",51078:"23039dab",51105:"9273b6ea",51317:"ff17fe39",51478:"66075e47",51732:"ea31977e",51842:"a88f573c",51910:"eff31342",51918:"0614f4d5",52406:"4d053caf",52433:"30590104",52447:"d06ddc63",52535:"b03595a3",52730:"7a6b4a14",52752:"355782e7",52787:"838e0402",52804:"eefbdfaa",53017:"9348a25f",53050:"6abd1a54",53090:"75338eaf",53140:"a29368cb",53338:"083cbf6d",53572:"7effd19a",53608:"72da96df",54528:"ff5b94ed",54890:"af1bc49c",55276:"facd650b",55305:"1e659f3b",55538:"81957284",55702:"24761a8e",55864:"16c1759c",56373:"f0ab3974",57074:"b14d0343",57168:"9a260310",57231:"f08ac08d",57258:"87fca17c",57689:"0759cd84",57887:"3c00aa5f",57924:"929e4f47",57930:"1e91e4cb",58092:"80d71d4e",58462:"ef15d1f1",58524:"b857b4f3",58738:"43f88bbc",58755:"70e7bcad",58846:"b5894e92",58861:"99c57d57",59076:"823e2f17",59394:"989a950f",59484:"edba3628",59823:"c4a7067d",60055:"fb65aa03",60445:"deb5d152",60880:"4828013e",61407:"8dcb13c6",61428:"d1a4d228",61442:"6214188f",61755:"85679afc",61778:"42efbba1",61829:"79c4438f",62079:"3d50e52d",62106:"b11711e1",62217:"f597b4a5",62465:"26755e65",62565:"7331c10f",62675:"0c1bfb62",62721:"a55c8bbb",62750:"e185638d",62773:"35944844",63002:"cce5fea8",63099:"11b155f6",63196:"3a5e22f4",63402:"9a4cd328",63448:"9766b089",64172:"85cd1357",64195:"13fd239b",64233:"871b4ef9",64482:"d6d86e59",64713:"ce848f45",64927:"0b0a4e1c",64992:"2852555a",65437:"08ba8e6c",65752:"d9a972a6",65942:"3ac80a08",66167:"22cd1bfb",66419:"e6a0b17c",66555:"4058ffc7",66808:"061aa888",66945:"1e9742d3",66981:"42ac147f",67218:"4d5c911a",67287:"30019ebd",67394:"0c477410",67448:"ea219a5e",67597:"1f3a4c28",67664:"1f13f78c",67706:"b758af82",67870:"668faecc",68070:"5466f725",68283:"90b5bff2",68314:"5990284f",68526:"93b7d091",68549:"c424522e",68585:"01ca353e",68809:"34affa88",69159:"e2f680f0",69226:"ec923500",69314:"dc5f5244",69559:"fc9487d1",69966:"008e5e0b",70224:"02e885c1",70344:"6ee9dc6c",70406:"5572fd3b",70880:"660ff070",71535:"d1cced29",71580:"60736693",72141:"3c8ed086",72249:"833d6d75",72286:"97cc98df",72293:"5635000a",72326:"24f71dd5",72912:"622d8a7f",73060:"a4a4b44f",73249:"6b863ffe",73419:"78729bc5",73443:"f064c580",73602:"68c9c4d9",73796:"52f85e79",74275:"04909339",74772:"fe2304b2",74963:"3f156112",75088:"4d3332b5",75488:"80bb8867",75586:"ced09747",75658:"39b67445",76258:"e4fa02c3",76300:"de74179d",76454:"392d130c",76515:"09d48160",76572:"844b09b1",76780:"9fcdbad9",76843:"1987905e",77225:"d02aa824",77390:"a83939c0",77491:"28d99640",77682:"dd07edcb",77697:"8ab49adf",77969:"f101a58f",78047:"17c85fed",78480:"6094a26e",78579:"29367017",78817:"861d7487",79043:"9386a8fc",79079:"550ded59",79088:"4dea5ba6",79328:"01e88565",79362:"0185cc0f",80053:"4c83d661",80112:"fd699d0f",80322:"c2a5fb58",80598:"9604dd74",80707:"693fd0ce",80751:"17b0c135",80829:"a2ebc88c",80837:"842e3aa3",80874:"bf4d4db4",80982:"3a6c096a",81102:"ddb287b3",81340:"c70c4ed4",81663:"16985740",81787:"e94876f2",81878:"5025bbc2",81913:"45c6b533",81970:"f817585c",82158:"34c7f206",82412:"06ef2967",82569:"b8197956",82643:"aab4adeb",82668:"86174e6f",82681:"431ff015",82723:"2533d7d1",82744:"251a5009",82880:"e515703a",82994:"db964a35",83251:"5b905490",83378:"30d16d45",83393:"b4209ab4",83669:"d3240ea5",83781:"40717c05",83790:"eaac0795",84202:"3d5cb62e",84207:"75958044",84298:"a9f205e4",84880:"ee402f90",84908:"2baaa7ee",84954:"a71c1740",84979:"2a25ce30",85043:"f98b3ebd",85122:"e3efa00f",85332:"4a6b1343",85433:"81df0347",85648:"e46388c0",85729:"0a998fbe",85847:"0064d3f3",86184:"ce1a31c9",86248:"41094952",86482:"79b92687",86704:"4f81ddb2",86856:"985584fd",87320:"e9b88331",87384:"6d8d8c86",87452:"b3e364f6",87597:"41108fd6",87684:"e7b3125d",87780:"f43ae5ba",87808:"37b0acfa",88003:"4b4a7a4c",88838:"1a748968",88984:"3ff08287",89220:"e11e9cc5",89262:"0e62ec09",89407:"e589fd66",89450:"f36475fd",89461:"2a186faa",89463:"fcc8cf1f",89767:"5f64aaac",89864:"da57dee1",90288:"f854e6db",90544:"ebad36c0",91078:"8a9dd085",91103:"634f6370",91210:"f250c9ac",91214:"9600caec",91387:"50b3c0bf",91391:"6e8550b2",91570:"c5bc4d89",91747:"1df1ce06",91755:"efaa05fe",91824:"d2381236",91871:"264fce60",91895:"7233fcae",91921:"0f73e2cb",92018:"18420d4b",92045:"b7082472",92059:"3cb43b94",92321:"a13df7c4",92805:"bc260ba1",92836:"e6ce93fd",92867:"86a6b9e8",92982:"902b0046",93089:"5241fd92",93377:"aa0a6603",94330:"c6afff6b",94353:"a5690af9",94395:"8bb62ead",94508:"a7b3b267",94548:"7223c440",94553:"49648640",94579:"17f5fc3c",94597:"985993f6",94603:"107f7123",94623:"552eed53",94702:"239c52b1",94744:"1273506a",94756:"e8ca3715",94916:"ba81f68e",94994:"e5301c26",95049:"1d1e73a8",95064:"a0f894d7",95521:"64a26515",95584:"9c262440",95602:"279c2c7b",95717:"2a4b9c2d",95768:"92ce7313",95974:"53f9c073",96350:"146afc64",96673:"1c5fb002",96816:"d0dd057b",96960:"05a860df",97030:"47d88247",97065:"6050493a",97165:"c0ed39cb",97297:"2ae26e66",97381:"3bd2df9d",97715:"61cec5c2",97721:"489ca716",97800:"28867a86",97920:"b813a053",97988:"956d5ccf",98050:"0cdbfd5b",98197:"69b26a94",98462:"9f99c59e",98528:"f7d9cf13",98546:"238da816",98583:"1cb4799b",99119:"8e792955",99128:"abb2247b",99147:"2e0f9fd2",99161:"3e7e608f",99374:"4be47182",99378:"e5729889",99601:"3a075b02",99781:"0dcdc1ac",99970:"5f073138"}[e]+".js",r.miniCssF=e=>{},r.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||new Function("return this")()}catch(e){if("object"==typeof window)return window}}(),r.o=(e,b)=>Object.prototype.hasOwnProperty.call(e,b),d={},a="synapseml:",r.l=(e,b,f,c)=>{if(d[e])d[e].push(b);else{var t,o;if(void 0!==f)for(var n=document.getElementsByTagName("script"),i=0;i{t.onerror=t.onload=null,clearTimeout(s);var a=d[e];if(delete d[e],t.parentNode&&t.parentNode.removeChild(t),a&&a.forEach((e=>e(f))),b)return b(f)},s=setTimeout(l.bind(null,void 0,{type:"timeout",target:t}),12e4);t.onerror=l.bind(null,t.onerror),t.onload=l.bind(null,t.onload),o&&document.head.appendChild(t)}},r.r=e=>{"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},r.p="/SynapseML/",r.gca=function(e){return e={15516451:"99128",17896441:"27918",20641120:"787",26631026:"78480",55203621:"3554",71042800:"59484",75406112:"73060",75801678:"6289",90742975:"36005","32fe2e34":"226",e8d66fc1:"271","88b63415":"360","001aea4c":"390","7b60d8e3":"403",c4d68582:"429",b83021be:"587",b66ab6b5:"688",b3272230:"806","73c2022c":"847","0a65b9de":"939","421d486c":"1084","1f6f5ac9":"1123","255635fc":"1172",f264933a:"1283",fd4bd09d:"1286","3cbfbf75":"1773","513002de":"1802",ad10988d:"1854","368de161":"1985","6c82e982":"2070",b72abe57:"2195","59e4d88e":"2381","09bc1516":"2408",e4e95844:"2797","980b00ea":"2901","6c61f93b":"3202","6e159789":"3611","513214d5":"3701",d1428557:"3765",b41f295c:"3814","445012ce":"3871","60a2189a":"3953","73cf52e8":"4026","1ad91055":"4193",ea2d2bcc:"4204","1e0d2d3b":"4503",c065fcb9:"4800",cf57716c:"5000","405fabd0":"5253","33b3776b":"5254",b8963dc0:"5287",fc338b02:"5378","1155dc6f":"5789","78cb5959":"5909","10b7acf6":"6004","5e008bd1":"6304","4bbbdfcf":"6413","707d2a35":"6633",ed40d5f1:"6712","567cd2fd":"7201","268b7b40":"7234","9530a2cf":"7509","9c5088a2":"7925","9da9112f":"8006",e24ee294:"8040",d13e532e:"8146",e7bd83ca:"8168","2d527871":"8749",a563fa93:"8839","41f327fc":"8966","1518f45a":"9296","4f1ca6a9":"9540","014ce9a3":"10643",e12b4691:"10912",ac581902:"10978",c76e8cac:"11298",fba72e87:"11542","2ece0fbe":"11561","3b5de274":"11977",d05981ec:"12124","6c7ccb6d":"12398","5962ef6e":"12630","75514f6a":"12824","4bea8531":"12840","1b3ab3b8":"13431",ef6ec597:"13482","1b7af47d":"14273",a5ffe972:"14341","1842239f":"14535","90e4432b":"15095","603706f6":"15149","4f81391a":"15165",b7787e72:"15253","1abf1e18":"15343",ac39f7dc:"15404","3a4f9d93":"15487",a9330763:"15511",ebf648dd:"15566",dd29718b:"15579",f3e986ee:"15788","18c7d2a3":"15800","07f6b90a":"15839","0f113696":"16030","76d23901":"16269",bef0d2d3:"16286","3c1f4383":"16551","5b779334":"16833","51dbb02f":"16975","3ea19ec0":"17026","6eddee4c":"17295",ad139e3c:"17538","66d63bfc":"17542",b1706689:"17655",b328a361:"17782","1766722a":"17817",dd3f9ada:"17820","63cd8b22":"17973","445a1b28":"18373",ceeda143:"18443",d84904f9:"18563",c73cfc52:"18825","514f485e":"18828","11c317cb":"18933",c14fdd92:"19042","8bff2444":"19336","548964ce":"19465","15ea8379":"19483","2b30bc14":"19603",b2be3901:"19775","275c0efd":"19950","20e0fe38":"21102","6e4e5cd9":"21401",cb7c2a83:"21933",a34c49d0:"22007",e316de39:"22011","5407376f":"22287",aa5548ab:"22341","8af72580":"22577","49e6864b":"22707","6b40ec54":"22712","88f8e3e1":"22970","052105dd":"23036",c0391845:"23234","4e3910af":"23544",f21c13c4:"23807","92ce4fd5":"23838","35826f14":"23894","2bcda99e":"23941","48329b94":"23979","7aeb6f16":"24046",c5c95e45:"24422","1a3608a9":"24484",b962e007:"24594",e579bad6:"24627","10e13725":"25116",cf666d49:"25449",cee4c3b9:"25844","38d6824e":"25990","295a8e78":"26021","0eaf8468":"26184","03d0d544":"26676",c2e57a1b:"26685","8a1bca9d":"26871",ce40236b:"27497",a07d09c1:"27572","35bdc661":"27813",dc1e40d7:"27991","8d990105":"28456","6e2678b7":"28465",dac82c00:"28559",f1b2dc7f:"28634",b066233f:"28696",d620a661:"28746","6e71bda3":"28955","2d5b0e6a":"28957","9783ff24":"29006","7c007ec6":"29283","1be78505":"29514","905ce8b5":"29535",c82134d8:"29540","166d99f5":"29696",b9f125cc:"29806","39a50b22":"29845","5eddefdd":"30013",b739c60a:"30023","627aefca":"30301","22444eb9":"30335","0a9c6347":"30460","47bbce76":"30464",a6191053:"30927",c95882d3:"31153",fd3b6ea5:"31279","521f9727":"31333","645abc7d":"31836","9fa7e8eb":"31896","561bd03d":"32185","5f17911b":"32599","8ff6772d":"32777","0606f1a5":"32789","5bf420e9":"32855",da508cef:"33064","6a26e359":"33195","5c18deb5":"33358","90f00051":"33631","01587ade":"33988","141dc4a8":"34334","3e1e84da":"34397",e887b273:"34654","0d432c84":"34797","7275f11a":"34940","2c5f0abd":"35015","0daa5b3f":"35024","767a7177":"35040",c3b1d949:"35056","061b38c5":"35124","54287b47":"35328","22f921cc":"35517",c3dac62d:"35537","0637d1e7":"35750","0453ae68":"35939","50af03e4":"36204","6c7d288d":"36224",b0e28723:"36285","589adaf2":"36572",b7226801:"36585",affaa541:"36673",aba932dd:"36698","5ef28062":"36746",b4ae214b:"36773",df01388f:"36828",f027bb15:"36916","1e57c92a":"36958","83586bff":"37001","40b49758":"37069","27de0303":"37334",ef77ef39:"37392","21f47424":"37474",a406e231:"37486","0713a501":"37592","077ca2ec":"37610",d57515f7:"37638","4631a032":"37676","5efd5006":"37742",d9e5e528:"38057","6f8cd013":"38078","012c1c0f":"38261","70b306ba":"38377","327be84b":"38399","16d05917":"38759","5e7c3303":"38781",bf04857e:"38846",b7802ae1:"38859","4b18e5ae":"38962",ae89cd3b:"39139",bb7a48da:"39242","0d6d64cd":"39307","16d8ca91":"39402","4e105cba":"39596","2e93ae0f":"40023",fece3c24:"40367","9e5b788f":"40412","610d430a":"40659","965ed185":"40852","1db1f785":"40950","04804d02":"41607",b66e2100:"41719","391cb159":"41867","79114dfe":"41944","2cec6bcf":"41987","328f3af4":"42037",e3089280:"42283","1789daeb":"42377","31ee0fa3":"42428","6c4c8509":"42522",e21a3367:"42638","3ee83f58":"42802","993c5022":"42815","423540f2":"42852","42736d5f":"42947",de3bb7c3:"42970","1587ba1d":"43156","455aa1bd":"43251","995576e9":"43344","494501bd":"43426","336404dc":"43488","034e6169":"43730",d41234a3:"44177","70fe7dd7":"44178","00ab2cda":"44334",cd8e97a5:"44422",d81d1137:"44490","99e5ffa1":"44504","2d8a44d7":"44534",fe6131e8:"44539","104b9b56":"45282","7649ec47":"45370",bc85b804:"45503","7ad384dc":"45589","8799ef55":"45938",ccc49370:"46103","96cf5ff0":"46135","0fb98d33":"46338","98258c5e":"46559",b4898d44:"46613",c39e3602:"46807","79d79762":"46845",c5775233:"47454","01eb996b":"47552","408524d4":"48007","59ef8022":"48491","25fb872c":"48602","13246fd5":"48807",eed630e0:"49101",efbc69e1:"49196","3e7c5569":"49778","3a7a88af":"49814",fc598b5d:"50002",a81cf8b0:"50040","9c279ae7":"50196",fe2c893b:"50297","97592aac":"50653","069e3c4c":"50745","5c0b440a":"50783",ff428354:"51078","6b9bdd6f":"51105","6a9ad78d":"51317",b4a95996:"51478",e9aab890:"51732",d8d6ba90:"51842","644ff257":"51910","9688a5be":"51918",c4d09a44:"52406","345903d4":"52433","2e5f4263":"52447","814f3328":"52535",a3fd5a6d:"52730","9b020c4a":"52752",ed5c6c48:"52787","12b89d4a":"52804",bf002efe:"53017","943afd32":"53050","86ed1ff2":"53090",b88f1f9b:"53338",a8456d9f:"53572","9e4087bc":"53608",cb5f1a06:"54528",f537d1e9:"54890","76b137bf":"55276",ca0a1bad:"55305",bece3771:"55538","41ee0cff":"55702",ce7af6ae:"55864",cff2e56e:"56373","1293b2b0":"57074",d9837698:"57168","9d095dba":"57231",e2689dfc:"57258","39122aab":"57689",be95fd8f:"57887","15a59c33":"57924",d39aa6d3:"57930","4d31bfb3":"58092",d989c707:"58462","0349e4f0":"58524",b940e0b5:"58738","990f07de":"58755","3bed3e2f":"58846","7c2e1100":"58861",dfb6dfe9:"59076","4801d370":"59394","76bf568a":"59823","403e642d":"60055",f322d2da:"60445",f0649d54:"60880","3ef20ed6":"61407",b5375b6f:"61428",b75118f0:"61442","04fc65cc":"61755",a0d8a1a2:"61778",f38a0cbd:"61829","5cfa133c":"62079","2c98ca3e":"62106","04ee4ab4":"62217",ddacd194:"62465",c38c658c:"62565","4ef2f7cf":"62675",d95a70ea:"62721","52880d18":"62750","08019edc":"62773",bf8a5c8a:"63002","95d9e891":"63099","74091a62":"63196","56543a15":"63402",ed6d544d:"63448","3bbe0f34":"64172",c4f5d8e4:"64195",b7c6295c:"64233","0e3f998d":"64482",d355047f:"64713",fa2e2688:"64927","37ab9beb":"64992","44ff0d07":"65437",c3c516ff:"65752","900b935c":"65942","74659d33":"66167",e5fe80e3:"66419","0fa36e32":"66555","8462374b":"66808","296054a7":"66945",e863531f:"66981",ba9c0924:"67218","99f8fee5":"67287","31bb7274":"67394","888e2229":"67448",a608660b:"67597",faca745a:"67664",db170a23:"67706",ed316aed:"67870",a1824f78:"68070",e84755e1:"68283","104bf0a6":"68314","3201258e":"68526",c991f47b:"68549","18c9c2ba":"68585",fd748117:"68809","57e687e8":"69159","8181e18a":"69226","232d09b0":"69314","3fb29942":"69559",d1246f49:"69966","77c259dd":"70224","7aa7b8e0":"70344","6a07b55e":"70406","2461af4d":"70880","1a215693":"71535","179d57fa":"71580",a877f9e5:"72141","38b375cc":"72249","3d963cd5":"72286","56087ddb":"72293","1ef65056":"72326","13f17286":"72912","5225b7e0":"73249","8e32e44d":"73419","91e9cb67":"73443",af1e70b6:"73602","6a95f87e":"73796","792eae7e":"74275","4997ef4f":"74772","6f479459":"74963","7a1d9beb":"75088",cbc3190d:"75488",e7250a93:"75586",c23b5e26:"75658","3dafa002":"76258",d252041b:"76300","34f00221":"76454","9cb6fa7c":"76515","58486a98":"76572","62983deb":"76843","51878b77":"77225",b35e9998:"77390",d3093636:"77491","50f80512":"77682","39582d99":"77697","5887dc62":"77969","3b75a1c6":"78047","0b416bde":"78579","88e44e99":"78817","00353f68":"79043",efc1978d:"79079","0a954229":"79088",a0434473:"79328","34ff21b6":"79362","935f2afb":"80053","8fd0c721":"80112",b576c645:"80322",e1269457:"80598",ab9a8589:"80707","556ad472":"80751",ee6cea3b:"80829","17f418b7":"80837",b92ffdc6:"80874","2fb6c1b0":"80982",d25cde70:"81102",e1b2b5f3:"81340","00056db7":"81663",e4ba4487:"81787",fa889bee:"81878","1d820955":"81913","01a15f20":"81970",f7580424:"82158","12d85ab5":"82412",e03f5a54:"82569",f228fdc5:"82643","3a1147b5":"82668","62ccfbde":"82681","23af8e3e":"82723",f8597cb0:"82744",e1ed1e14:"82880","48cf7354":"82994",c34b1e36:"83251",fbb011bb:"83378","3e9a7422":"83393",d707d2ed:"83669","866b91a1":"83781","604b2d50":"83790",c2492d56:"84202",f14633e7:"84207","6094981e":"84298",f8aab3ca:"84880","59d9fecc":"84908",f7a32432:"84954",e8b9c2c7:"84979","8eb438b4":"85043",c2e40c34:"85122","321ba9c5":"85332",ffb7f88a:"85433",ee83122d:"85648","56d039a1":"85729","4e044b4e":"85847","4130db01":"86184","7bf16cc6":"86248","3c22aa66":"86482","67e06a8b":"86704",b80b493a:"86856",c40984d1:"87320",b35d004b:"87384","93ba15e0":"87452","66fcb786":"87597",b38eec89:"87684",b28fe732:"87808","5066efb2":"88003","70e8008e":"88838",d535b358:"88984",a2b5e5df:"89220","120352d6":"89262",deef0af8:"89407","83203dd1":"89450","6ff7775f":"89461",a19ce767:"89463",c413b43a:"89767","1dc2f362":"89864","69bb4b11":"90288","4585b70e":"90544","80472ecc":"91078","3ac37a06":"91103",e2ce57ed:"91210",cc52e3cd:"91214",d36f3429:"91387","2137a7cd":"91391","4de42975":"91570","85aed2f9":"91747",bc4bf151:"91755",c90c942b:"91824","8590feaa":"91871","78aebd5d":"91895","8328d740":"91921",dc7f7ac1:"92018","0e0ee9b2":"92045",b33598ac:"92059","325e22bd":"92321",d175df5e:"92805","8a46fce0":"92836","5dbf4619":"92867","3647ac9b":"92982",a6aa9e1f:"93089","318ccb43":"93377",d8b3ac6a:"94330","1f31a275":"94353","0e2bf69e":"94395",e464d112:"94508","53a5cb1e":"94548",d7f58694:"94553","94d743d6":"94579",d0cb74df:"94597",f7bfff73:"94603",e402e26d:"94623","5a3e1bca":"94702","1014102e":"94744","4cee39d7":"94756","82878c84":"94916","109f6864":"94994",f85db098:"95049",e9b75d67:"95064","283b8f99":"95521",bd1bcb2c:"95584","57ef17c5":"95602",d0c5e4fa:"95717",a34b09d4:"95768","8cb11643":"95974","1e14124f":"96350",e5c4ef01:"96673","82d7cc79":"96816","1a62d1af":"96960",c764f2c0:"97030",fe5d19cb:"97065","294c060e":"97165",e0077969:"97297","4db82253":"97381","2a023663":"97715","0e16e9ea":"97721","09990b79":"97800","1a4e3797":"97920","4e432063":"97988",d3c9b0d7:"98050",eb0ef952:"98197",bc6a9944:"98462","7296bed0":"98528",f5b4e517:"98546","6bdbf6b8":"98583","2406587d":"99119","7aa49360":"99147","69dc6d0a":"99161","58b8f176":"99374",efbb59fb:"99378",fc47fc53:"99601",dea122e2:"99781","093b81f4":"99970"}[e]||e,r.p+r.u(e)},(()=>{var e={51303:0,40532:0};r.f.j=(b,f)=>{var d=r.o(e,b)?e[b]:void 0;if(0!==d)if(d)f.push(d[2]);else if(/^(40532|51303)$/.test(b))e[b]=0;else{var a=new Promise(((f,a)=>d=e[b]=[f,a]));f.push(d[2]=a);var c=r.p+r.u(b),t=new Error;r.l(c,(f=>{if(r.o(e,b)&&(0!==(d=e[b])&&(e[b]=void 0),d)){var a=f&&("load"===f.type?"missing":f.type),c=f&&f.target&&f.target.src;t.message="Loading chunk "+b+" failed.\n("+a+": "+c+")",t.name="ChunkLoadError",t.type=a,t.request=c,d[1](t)}}),"chunk-"+b,b)}},r.O.j=b=>0===e[b];var b=(b,f)=>{var d,a,c=f[0],t=f[1],o=f[2],n=0;if(c.some((b=>0!==e[b]))){for(d in t)r.o(t,d)&&(r.m[d]=t[d]);if(o)var i=o(r)}for(b&&b(f);n