diff --git a/genai-example-app-only.ipynb b/genai-example-app-only.ipynb index c9e1803..19b29f5 100644 --- a/genai-example-app-only.ipynb +++ b/genai-example-app-only.ipynb @@ -8,10 +8,10 @@ }, "source": [ "# Neo4j Generative AI Workshop Example Application\n", - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/guerinjeanmarc/genai-workshop/blob/main/genai-example-app-only.ipynb)\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neo4j-product-examples/genai-workshop/blob/main/genai-example-app-only.ipynb)\n", "\n", "__This notebook is a copy of `genai-workshop.ipynb` that contains only the final section: the example application for the LLM content generator. This notebook assumes you have already run `genai-workshop.ipynb`.__\n", - " \n", + "\n", "__Please note: There is no need to run this notebook for the workshop. It exists for demo purposes only.__" ] }, @@ -39,7 +39,7 @@ }, { "cell_type": "code", - "execution_count": 66, + "execution_count": 1, "metadata": { "id": "yY1XylsiZACB", "pycharm": { @@ -54,7 +54,7 @@ }, { "cell_type": "code", - "execution_count": 67, + "execution_count": 2, "metadata": { "id": "7psF1otOdyXe" }, @@ -67,7 +67,7 @@ "from langchain_openai import OpenAIEmbeddings, ChatOpenAI\n", "from langchain.vectorstores.neo4j_vector import Neo4jVector\n", "from langchain.graphs import Neo4jGraph\n", - "from langchain.prompts import SystemMessagePromptTemplate, HumanMessagePromptTemplate, ChatPromptTemplate\n", + "from langchain.prompts import PromptTemplate\n", "from langchain.schema import StrOutputParser\n", "from langchain.schema.runnable import RunnableLambda\n", "import gradio as gr\n", @@ -88,14 +88,14 @@ "\n", "There are two things you need here.\n", "1. Start a blank [Neo4j Sandbox](https://sandbox.neo4j.com/). Get your URI and password and plug them in below. Do not change the Neo4j username.\n", - "2. Get your OpenAI API key. You can use [this one](https://docs.google.com/document/d/19Lqjd0MqRs088KUVnd23ZrVU9G0OAg-53U72VrFwwms/edit) if you do not have one already\n", + "2. Get your OpenAI API key. You can use [this one](https://docs.google.com/document/d/19Lqjd0MqRs088KUVnd23ZrVU9G0OAg-53U72VrFwwms/edit) if you do not have one already.\n", "\n", "To make this easy, you can write the credentials and env variables directly into the below cell." ] }, { "cell_type": "code", - "execution_count": 68, + "execution_count": 3, "metadata": { "id": "BQ9s0ZWhekd8" }, @@ -110,15 +110,16 @@ "AURA_DS = False\n", "\n", "# AI\n", - "LLM = 'gpt-4'\n", + "LLM = 'gpt-4o'\n", "\n", "# OpenAI - Required when using OpenAI models\n", - "os.environ['OPENAI_API_KEY'] = 'sk-...' #change this" + "os.environ['OPENAI_API_KEY'] = 'sk-...' #change this\n", + "OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')" ] }, { "cell_type": "code", - "execution_count": 69, + "execution_count": null, "metadata": { "id": "o-98NuINdyXe" }, @@ -138,7 +139,8 @@ " AURA_DS = eval(os.getenv('AURA_DS').title())\n", "\n", " # AI\n", - " LLM = os.getenv('LLM')" + " LLM = 'gpt-4o'\n", + " OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')" ] }, { @@ -158,9 +160,9 @@ }, { "cell_type": "code", - "execution_count": 70, + "execution_count": 4, "metadata": { - "collapsed": false + "id": "P-06mvW-A59U" }, "outputs": [], "source": [ @@ -172,14 +174,14 @@ }, { "cell_type": "code", - "execution_count": 71, + "execution_count": 5, "metadata": { "id": "JI9LVEdKekeH" }, "outputs": [], "source": [ "# Import relevant libraries\n", - "from langchain.prompts import SystemMessagePromptTemplate, HumanMessagePromptTemplate, ChatPromptTemplate\n", + "from langchain.prompts import PromptTemplate\n", "from langchain_openai import ChatOpenAI\n", "from langchain.schema import StrOutputParser\n", "\n", @@ -203,14 +205,12 @@ }, { "cell_type": "code", - "execution_count": 72, + "execution_count": 12, "metadata": { "id": "WLBBVRXwdyXq" }, "outputs": [], "source": [ - "# We will use a mock URL for our sources in the metadata\n", - "\n", "def kg_personalized_search_gen(customer_id):\n", " return Neo4jVector.from_existing_index(\n", " embedding=embedding_model,\n", @@ -224,21 +224,20 @@ " OPTIONAL MATCH(product)<-[:VARIANT_OF]-(:Article)<-[:PURCHASED]-(:Customer)\n", " -[:PURCHASED]->(a:Article)<-[:PURCHASED]-(:Customer {{customerId: '{customer_id}'}})\n", " WITH count(a) AS purchaseScore, product, searchScore\n", - " RETURN product.text + '\\nurl: ' + 'https://representative-domain/product/' + product.productCode AS text,\n", + " RETURN product.text + '\\nurl: ' + product.url AS text,\n", " (1.0+purchaseScore)*searchScore AS score,\n", - " {{source: 'https://representative-domain/product/' + product.productCode}} AS metadata\n", + " {{source: product.url }} AS metadata\n", " ORDER BY purchaseScore DESC, searchScore DESC LIMIT 5\n", "\n", " \"\"\"\n", " )\n", "\n", - "# Use the same personalized recommendations as above but with a smaller limit\n", "kg = Neo4jGraph(url=NEO4J_URI, username=NEO4J_USERNAME, password=NEO4J_PASSWORD)\n", "def kg_recommendations_app(customer_id, k=30):\n", " res = kg.query(\"\"\"\n", " MATCH(:Customer {customerId:$customerId})-[:PURCHASED]->(:Article)\n", " -[r:CUSTOMERS_ALSO_LIKE]->(:Article)-[:VARIANT_OF]->(product)\n", - " RETURN product.text + '\\nurl: ' + 'https://representative-domain/product/' + product.productCode AS text,\n", + " RETURN product.text + '\\nurl: ' + product.url AS text,\n", " sum(r.score) AS recommenderScore\n", " ORDER BY recommenderScore DESC LIMIT $k\n", " \"\"\", params={'customerId': customer_id, 'k':k})\n", @@ -255,43 +254,44 @@ "source": [ "### Prompt Engineering\n", "\n", - "Now, let's define our prompts. We will combine two:\n", - "1. A system prompt which, in this case, tells the LLM how to generate the message\n", - "2. A human prompt that just wraps the customer search(es)/interest(s)\n", - "\n", - "This will allow us to pass the customer interest(s) to the retriever but then also to the LLM for additional context when drafting the message.\n" + "Now let's define our prompt. We will accept multiple parameters and provide detailed instructions to the LLM to condition the response based of retrieved data, customer interests, and time of year.\n" ] }, { "cell_type": "code", - "execution_count": 73, + "execution_count": 7, "metadata": { "id": "aUAROR6aekeI" }, "outputs": [], "source": [ - "general_system_template = '''\n", - "You are a personal assistant named Sally for a fashion, home, and beauty company called HRM.\n", - "write an email to {customerName}, one of your customers, to promote and summarize products relevant for them given the current season / time of year: {timeOfYear} .\n", - "Please only mention the products listed below. Do not come up with or add any new products to the list.\n", - "Each product comes with an https `url` field. Make sure to provide that https url with descriptive name text in markdown for each product.\n", - "\n", - "---\n", - "# Relevant Products:\n", + "prompt = PromptTemplate.from_template('You are a personal assistant named Sally '\n", + "'for a fashion, home, and beauty company called HRM.'\n", + "'write an engaging email to {customerName}, one of your customers, '\n", + "'to promote and summarize products relevant for them given: '\n", + "'- The current season / time of year: {timeOfYear}'\n", + "'- Recent searches/interests: {customerInterests}'\n", + "'Please only mention the products listed below. '\n", + "'Do not come up with or add any new products to the list.'\n", + "'Each product comes with an https `url` field. '\n", + "'Make sure to provide that https url with descriptive name text '\n", + "'in markdown for each product.'\n", + "'''\n", + "\n", + "# RelevantProducts:\n", + "These are products from the HRM store the customer may be interested in based\n", + "on their recent searches/interests: {customerInterests}\n", "{searchProds}\n", "\n", "# Customer May Also Be Interested In the following\n", - " (pick items from here that pair with the above products well for the current season / time of year: {timeOfYear}.\n", - " prioritize those higher in the list if possible):\n", + "The below candidates are recommended based on the shared purchase patterns of\n", + "other customers in the HRM database.\n", + "Select the best 4 to 5 product subset from the context that best match the\n", + "time of year: {timeOfYear} and to pair with the RelevantProducts above.\n", + "For example, even if scarfs are listed here, they may not be appropriate for a\n", + "summer time of year so best not to include those.\n", "{recProds}\n", - "---\n", - "'''\n", - "general_user_template = \"{searchPrompt}\"\n", - "messages = [\n", - " SystemMessagePromptTemplate.from_template(general_system_template),\n", - " HumanMessagePromptTemplate.from_template(general_user_template),\n", - "]\n", - "prompt = ChatPromptTemplate.from_messages(messages)" + "''')" ] }, { @@ -303,7 +303,7 @@ "source": [ "### Create a Chain\n", "\n", - "Now let's put a chain together that will leverage the retrievers, prompts, and LLM model. This is where Langchain shines, putting RAG together in a simple way.\n", + "Now let's put a chain together that will leverage the retrievers, prompt, and LLM model. This is where Langchain shines, putting RAG together in a simple way.\n", "\n", "In addition to the personalized search and recommendations context, we will allow for some other parameters.\n", "\n", @@ -315,7 +315,7 @@ }, { "cell_type": "code", - "execution_count": 74, + "execution_count": 13, "metadata": { "id": "nUpih07QdyXr" }, @@ -329,11 +329,14 @@ "\n", "# LLM chain\n", "def chain_gen(customer_id):\n", - " return ({'searchProds': (lambda x:x['searchPrompt']) | kg_personalized_search_gen(customer_id).as_retriever(search_kwargs={\"k\": 100}) | format_docs,\n", - " 'recProds': (lambda x:customer_id) | RunnableLambda(kg_recommendations_app),\n", + " return ({'searchProds': (lambda x:x['customerInterests'])\n", + " | kg_personalized_search_gen(customer_id).as_retriever(search_kwargs={\"k\": 100})\n", + " | format_docs,\n", + " 'recProds': (lambda x:customer_id)\n", + " | RunnableLambda(kg_recommendations_app),\n", " 'customerName': lambda x:x['customerName'],\n", " 'timeOfYear': lambda x:x['timeOfYear'],\n", - " \"searchPrompt\": lambda x:x['searchPrompt']}\n", + " \"customerInterests\": lambda x:x['customerInterests']}\n", " | prompt\n", " | llm\n", " | StrOutputParser())" @@ -351,20 +354,20 @@ }, { "cell_type": "code", - "execution_count": 75, + "execution_count": 14, "metadata": { - "collapsed": false + "id": "EEdx6aTSA59V" }, "outputs": [], "source": [ "# example inputs\n", "CUSTOMER_ID = \"daae10780ecd14990ea190a1e9917da33fe96cd8cfa5e80b67b4600171aa77e0\"\n", - "search_prompt = 'Oversized Sweaters'" + "search_prompt = 'denim jeans'" ] }, { "cell_type": "code", - "execution_count": 76, + "execution_count": 15, "metadata": { "id": "lkBdqOVjekeI" }, @@ -375,9 +378,9 @@ }, { "cell_type": "code", - "execution_count": 84, + "execution_count": null, "metadata": { - "collapsed": false + "id": "CCX-ut4LA59V" }, "outputs": [], "source": [ @@ -396,71 +399,72 @@ }, { "cell_type": "code", - "execution_count": 78, + "execution_count": 16, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "Z7-yDDUaD6FD", - "outputId": "686d5f56-2cb8-49bc-8e29-6eb47cb40762" + "outputId": "2266a8c8-6a46-4103-da3a-9807edb6a1f4" }, "outputs": [ { - "name": "stdout", "output_type": "stream", + "name": "stdout", "text": [ "=== Prompt to send to LLM ===\n", - " System: \n", - "You are a personal assistant named Sally for a fashion, home, and beauty company called HRM.\n", - "write an email to Alex Smith, one of your customers, to promote and summarize products relevant for them given the current season / time of year: Feb, 2024 .\n", - "Please only mention the products listed below. Do not come up with or add any new products to the list.\n", - "Each product comes with an https `url` field. Make sure to provide that https url with descriptive name text in markdown for each product.\n", + " You are a personal assistant named Sally for a fashion, home, and beauty company called HRM.write an engaging email to Alex Smith, one of your customers, to promote and summarize products relevant for them given: - The current season / time of year: Feb, 2024- Recent searches/interests: denim jeansPlease only mention the products listed below. Do not come up with or add any new products to the list.Each product comes with an https `url` field. Make sure to provide that https url with descriptive name text in markdown for each product.\n", "\n", - "---\n", - "# Relevant Products:\n", + "# RelevantProducts:\n", + "These are products from the HRM store the customer may be interested in based\n", + "on their recent searches/interests: denim jeans\n", "##Product\n", - "Name: Queen Sweater\n", - "Type: Sweater\n", - "Group: Garment Upper body\n", - "Garment Type: Jersey Basic\n", - "Description: Top in lightweight sweatshirt fabric with ribbing around the neckline, cuffs and hem.\n", - "url: https://representative-domain/product/677930\n", + "Name: Rachel HW Denim TRS\n", + "Type: Trousers\n", + "Group: Garment Lower body\n", + "Garment Type: Trousers\n", + "Description: 5-pocket, ankle-length jeans in washed stretch denim in a relaxed fit with a high waist, zip fly and button and straight legs with cut-off, raw-edge hems.\n", + "url: https://representative-domain/product/670698\n", "\n", "##Product\n", - "Name: Jess oversize LS\n", - "Type: Top\n", - "Group: Garment Upper body\n", - "Garment Type: Jersey Basic\n", - "Description: Oversized top in soft jersey made from a cotton blend with dropped shoulders and long sleeves.\n", - "url: https://representative-domain/product/516712\n", + "Name: Jade HW Skinny Denim TRS\n", + "Type: Trousers\n", + "Group: Garment Lower body\n", + "Garment Type: Trousers\n", + "Description: High-waisted jeans in washed superstretch denim with a zip fly and button, fake front pockets, real back pockets and super-skinny legs.\n", + "url: https://representative-domain/product/706016\n", "\n", "##Product\n", - "Name: Petar Sweater(1)\n", - "Type: Sweater\n", - "Group: Garment Upper body\n", - "Garment Type: Jersey Basic\n", - "Description: Oversized top in sturdy sweatshirt fabric with dropped shoulders and ribbing around the neckline, cuffs and hem. Soft brushed inside.\n", - "url: https://representative-domain/product/557247\n", + "Name: Bono NW slim denim\n", + "Type: Trousers\n", + "Group: Garment Lower body\n", + "Garment Type: Trousers\n", + "Description: 5-pocket, ankle-length jeans in washed slightly stretch denim with a high waist, zip fly and button and tapered legs.\n", + "url: https://representative-domain/product/777038\n", "\n", "##Product\n", - "Name: Mother\n", - "Type: Sweater\n", - "Group: Garment Upper body\n", - "Garment Type: Knitwear\n", - "Description: Long-sleeved jumper in a soft knit containing some mohair with a ribbed turtle neck, dropped shoulders and slits in the sides. Slightly longer at the back.\n", - "url: https://representative-domain/product/675408\n", + "Name: Perrie Slim Mom Denim TRS\n", + "Type: Trousers\n", + "Group: Garment Lower body\n", + "Garment Type: Trousers\n", + "Description: 5-pocket, ankle-length jeans in washed, sturdy cotton denim with a high waist, button fly and slim, straight legs with raw-edge hems.\n", + "url: https://representative-domain/product/448509\n", "\n", "##Product\n", - "Name: Family Crew Ladies\n", - "Type: Sweater\n", - "Group: Garment Upper body\n", - "Garment Type: Special Offers\n", - "Description: Top in sweatshirt fabric with dropped shoulders, long sleeves and ribbing around the neckline, cuffs and hem. Soft brushed inside.\n", - "url: https://representative-domain/product/686265\n", + "Name: Jade Denim TRS\n", + "Type: Trousers\n", + "Group: Garment Lower body\n", + "Garment Type: Trousers\n", + "Description: High-waisted jeans in washed superstretch denim with a zip fly and button, fake front pockets, real back pockets and super-skinny legs.\n", + "url: https://representative-domain/product/539723\n", "\n", "# Customer May Also Be Interested In the following\n", - " (pick items from here that pair with the above products well for the current season / time of year: Feb, 2024.\n", - " prioritize those higher in the list if possible):\n", + "The below candidates are recommended based on the shared purchase patterns of\n", + "other customers in the HRM database.\n", + "Select the best 4 to 5 product subset from the context that best match the\n", + "time of year: Feb, 2024 and to pair with the RelevantProducts above.\n", + "For example, even if scarfs are listed here, they may not be appropriate for a\n", + "summer time of year so best not to include those.\n", "##Product\n", "Name: DONT USE ROLAND HOOD\n", "Type: Hoodie\n", @@ -486,14 +490,6 @@ "url: https://representative-domain/product/682848\n", "\n", "##Product\n", - "Name: Bubble Bum Bandeau (1)\n", - "Type: Bikini top\n", - "Group: Swimwear\n", - "Garment Type: Swimwear\n", - "Description: Fully lined bandeau bikini top with padded cups and removable inserts. Detachable ties at the back of the neck, ties at the back, side support and a silicone trim at the top.\n", - "url: https://representative-domain/product/642498\n", - "\n", - "##Product\n", "Name: Haven back detail\n", "Type: Bra\n", "Group: Underwear\n", @@ -502,6 +498,14 @@ "url: https://representative-domain/product/660519\n", "\n", "##Product\n", + "Name: Bubble Bum Bandeau (1)\n", + "Type: Bikini top\n", + "Group: Swimwear\n", + "Garment Type: Swimwear\n", + "Description: Fully lined bandeau bikini top with padded cups and removable inserts. Detachable ties at the back of the neck, ties at the back, side support and a silicone trim at the top.\n", + "url: https://representative-domain/product/642498\n", + "\n", + "##Product\n", "Name: Dixie tee\n", "Type: T-shirt\n", "Group: Garment Upper body\n", @@ -510,6 +514,14 @@ "url: https://representative-domain/product/598806\n", "\n", "##Product\n", + "Name: Rylee flatform\n", + "Type: Heeled sandals\n", + "Group: Shoes\n", + "Garment Type: Shoes\n", + "Description: Sandals with imitation suede straps, an elastic heel strap and wedge heels. Satin insoles and thermoplastic rubber (TPR) soles. Platform front 2 cm, heel 6 cm.\n", + "url: https://representative-domain/product/606711\n", + "\n", + "##Product\n", "Name: Eden SP Andes\n", "Type: Bra\n", "Group: Underwear\n", @@ -526,14 +538,6 @@ "url: https://representative-domain/product/244267\n", "\n", "##Product\n", - "Name: Rylee flatform\n", - "Type: Heeled sandals\n", - "Group: Shoes\n", - "Garment Type: Shoes\n", - "Description: Sandals with imitation suede straps, an elastic heel strap and wedge heels. Satin insoles and thermoplastic rubber (TPR) soles. Platform front 2 cm, heel 6 cm.\n", - "url: https://representative-domain/product/606711\n", - "\n", - "##Product\n", "Name: Leona Push Mirny\n", "Type: Bra\n", "Group: Underwear\n", @@ -542,14 +546,6 @@ "url: https://representative-domain/product/511924\n", "\n", "##Product\n", - "Name: Survivor\n", - "Type: Blouse\n", - "Group: Garment Upper body\n", - "Garment Type: Blouses\n", - "Description: Straight-cut blouse in a crêpe weave with a collar, concealed buttons down the front and fake flap front pockets. Yoke with a pleat at the back, long sleeves with pleats and buttoned cuffs, and a straight cut hem with slits in the sides.\n", - "url: https://representative-domain/product/662328\n", - "\n", - "##Product\n", "Name: Karin headband\n", "Type: Hairband\n", "Group: Accessories\n", @@ -558,12 +554,12 @@ "url: https://representative-domain/product/620425\n", "\n", "##Product\n", - "Name: Petar Sweater(1)\n", - "Type: Sweater\n", + "Name: Survivor\n", + "Type: Blouse\n", "Group: Garment Upper body\n", - "Garment Type: Jersey Basic\n", - "Description: Oversized top in sturdy sweatshirt fabric with dropped shoulders and ribbing around the neckline, cuffs and hem. Soft brushed inside.\n", - "url: https://representative-domain/product/557247\n", + "Garment Type: Blouses\n", + "Description: Straight-cut blouse in a crêpe weave with a collar, concealed buttons down the front and fake flap front pockets. Yoke with a pleat at the back, long sleeves with pleats and buttoned cuffs, and a straight cut hem with slits in the sides.\n", + "url: https://representative-domain/product/662328\n", "\n", "##Product\n", "Name: Rosemary\n", @@ -574,6 +570,14 @@ "url: https://representative-domain/product/753724\n", "\n", "##Product\n", + "Name: Petar Sweater(1)\n", + "Type: Sweater\n", + "Group: Garment Upper body\n", + "Garment Type: Jersey Basic\n", + "Description: Oversized top in sturdy sweatshirt fabric with dropped shoulders and ribbing around the neckline, cuffs and hem. Soft brushed inside.\n", + "url: https://representative-domain/product/557247\n", + "\n", + "##Product\n", "Name: Lead Superskinny\n", "Type: Trousers\n", "Group: Garment Lower body\n", @@ -606,28 +610,20 @@ "url: https://representative-domain/product/531615\n", "\n", "##Product\n", - "Name: Burcu Styling Scarf\n", - "Type: Scarf\n", - "Group: Accessories\n", - "Garment Type: Accessories\n", - "Description: Scarf in soft, patterned satin.\n", - "url: https://representative-domain/product/772565\n", - "\n", - "##Product\n", - "Name: Girlfriend R.W Trash\n", - "Type: Trousers\n", - "Group: Garment Lower body\n", - "Garment Type: Trousers Denim\n", - "Description: 5-pocket, ankle-length jeans in washed denim with hard-worn details in a slightly looser fit. Regular waist, zip fly and button, slightly lower crotch and tapered legs with raw-edge hems.\n", - "url: https://representative-domain/product/724904\n", + "Name: Gwen Jersey Top\n", + "Type: Vest top\n", + "Group: Garment Upper body\n", + "Garment Type: Dresses Ladies\n", + "Description: Fitted top in stretch jersey with a slight sheen. V-neck with a lace trim at the top and adjustable spaghetti straps.\n", + "url: https://representative-domain/product/671852\n", "\n", "##Product\n", - "Name: Veronica dress\n", + "Name: FF Kate dress PI\n", "Type: Dress\n", "Group: Garment Full body\n", - "Garment Type: Dresses Ladies\n", - "Description: Short dress in a crêpe weave with a V-neck and wrapover front with concealed press-studs. Short sleeves and a seam at the waist with a sewn-in tie belt.\n", - "url: https://representative-domain/product/710729\n", + "Garment Type: Special Offers\n", + "Description: Short dress in patterned stretch jersey with a round neckline, gathered seam at the waist and long raglan sleeves with gathered seams at the front. The polyester content of the dress is recycled.\n", + "url: https://representative-domain/product/796240\n", "\n", "##Product\n", "Name: Brad LW BF Denim TRS\n", @@ -638,36 +634,44 @@ "url: https://representative-domain/product/615970\n", "\n", "##Product\n", - "Name: Derek\n", - "Type: Dress\n", - "Group: Garment Full body\n", - "Garment Type: Dresses Ladies\n", - "Description: Calf-length dress in woven fabric with a collar, long sleeves and wide cuffs with a slit. Narrow elasticated seam at the waist, a pleated skirt and laser-cut hem. Unlined.\n", - "url: https://representative-domain/product/706366\n", + "Name: Baby shark top\n", + "Type: Bikini top\n", + "Group: Swimwear\n", + "Garment Type: Swimwear\n", + "Description: Lined, non-wired bikini top with flounces. Adjustable shoulder straps, cups with removable inserts that shape the bust and provide good support, and a metal fastener at the back.\n", + "url: https://representative-domain/product/861410\n", "\n", "##Product\n", - "Name: Gwen Jersey Top\n", - "Type: Vest top\n", - "Group: Garment Upper body\n", - "Garment Type: Dresses Ladies\n", - "Description: Fitted top in stretch jersey with a slight sheen. V-neck with a lace trim at the top and adjustable spaghetti straps.\n", - "url: https://representative-domain/product/671852\n", + "Name: Belle PU skirt\n", + "Type: Skirt\n", + "Group: Garment Lower body\n", + "Garment Type: Skirts\n", + "Description: Flared, calf-length skirt in imitation leather. High waist with press-studs and a concealed zip at one side, and visible seams front and back. Unlined.\n", + "url: https://representative-domain/product/856232\n", "\n", "##Product\n", - "Name: W COLOSSEO BLOUSE EQ\n", - "Type: Blouse\n", - "Group: Garment Upper body\n", - "Garment Type: Unknown\n", - "Description: Blouse in a cotton weave with a small, frilled collar, concealed buttons down the front, short puff sleeves and a gently rounded hem with slits in the sides. Slightly longer at the back.\n", - "url: https://representative-domain/product/665648\n", + "Name: Burcu Styling Scarf\n", + "Type: Scarf\n", + "Group: Accessories\n", + "Garment Type: Accessories\n", + "Description: Scarf in soft, patterned satin.\n", + "url: https://representative-domain/product/772565\n", "\n", "##Product\n", - "Name: Latte RW slacks\n", + "Name: Girlfriend R.W Trash\n", "Type: Trousers\n", "Group: Garment Lower body\n", - "Garment Type: Trousers\n", - "Description: Ankle-length cigarette trousers in a stretch weave with a regular waist, hook-and-eye fastening and zip fly. Side pockets, fake welt back pockets and tapered legs.\n", - "url: https://representative-domain/product/777504\n", + "Garment Type: Trousers Denim\n", + "Description: 5-pocket, ankle-length jeans in washed denim with hard-worn details in a slightly looser fit. Regular waist, zip fly and button, slightly lower crotch and tapered legs with raw-edge hems.\n", + "url: https://representative-domain/product/724904\n", + "\n", + "##Product\n", + "Name: Derek\n", + "Type: Dress\n", + "Group: Garment Full body\n", + "Garment Type: Dresses Ladies\n", + "Description: Calf-length dress in woven fabric with a collar, long sleeves and wide cuffs with a slit. Narrow elasticated seam at the waist, a pleated skirt and laser-cut hem. Unlined.\n", + "url: https://representative-domain/product/706366\n", "\n", "##Product\n", "Name: BISCUIT\n", @@ -700,9 +704,7 @@ "Garment Type: Jersey Fancy\n", "Description: Polo shirt in cotton piqué with a ribbed collar, button placket, short sleeves with ribbed trims, and slits in the sides.\n", "url: https://representative-domain/product/816759\n", - "---\n", "\n", - "Human: Oversized Sweaters\n", " === End Prompt ===\n", " \n" ] @@ -716,17 +718,17 @@ " '''\n", "\n", "def chain_print_prompt(customer_id):\n", - " return ({'searchProds': (lambda x:x['searchPrompt']) | kg_personalized_search_gen(customer_id).as_retriever(search_kwargs={\"k\": 100}) | format_docs,\n", + " return ({'searchProds': (lambda x:x['customerInterests']) | kg_personalized_search_gen(customer_id).as_retriever(search_kwargs={\"k\": 100}) | format_docs,\n", " 'recProds': (lambda x:customer_id) | RunnableLambda(kg_recommendations_app),\n", " 'customerName': lambda x:x['customerName'],\n", " 'timeOfYear': lambda x:x['timeOfYear'],\n", - " \"searchPrompt\": lambda x:x['searchPrompt']}\n", + " \"customerInterests\": lambda x:x['customerInterests']}\n", " | prompt\n", " | format_final_prompt\n", " | StrOutputParser())\n", "\n", "print( chain_print_prompt(CUSTOMER_ID)\\\n", - " .invoke({'searchPrompt':search_prompt, 'customerName':'Alex Smith', 'timeOfYear':'Feb, 2024'}))" + " .invoke({'customerInterests':search_prompt, 'customerName':'Alex Smith', 'timeOfYear':'Feb, 2024'}))" ] }, { @@ -741,13 +743,13 @@ }, { "cell_type": "code", - "execution_count": 79, + "execution_count": null, "metadata": { "id": "qeOts3Q4ZACL" }, "outputs": [], "source": [ - "#print(chain.invoke({'searchPrompt':\"western boots\", 'customerName':'Alex Smith', 'timeOfYear':'Feb, 2024'}))" + "#print(chain.invoke({'customerInterests':\"western boots\", 'customerName':'Alex Smith', 'timeOfYear':'July, 2024'}))" ] }, { @@ -763,7 +765,7 @@ }, { "cell_type": "code", - "execution_count": 80, + "execution_count": 17, "metadata": { "id": "A1F0ve3cekeI" }, @@ -782,93 +784,92 @@ "examples = [\n", " [\n", " CUSTOMER_ID,\n", - " 'March, 2024',\n", + " 'June, 2024',\n", " 'Alex Smith',\n", - " 'Oversized Sweaters'\n", + " 'denim jeans'\n", + " ],\n", + " [\n", + " CUSTOMER_ID,\n", + " 'July, 2024',\n", + " 'Alex Smith',\n", + " 'western boots'\n", " ],\n", " [\n", " '819f4eab1fd76b932fd403ae9f427de8eb9c5b64411d763bb26b5c8c3c30f16f',\n", - " 'March, 2024',\n", + " 'June, 2024',\n", " 'Robin Fischer',\n", - " 'Oversized Sweaters'\n", + " 'denim jeans'\n", " ],\n", " [\n", " '44b0898ecce6cc1268dfdb0f91e053db014b973f67e34ed8ae28211410910693',\n", - " 'March, 2024',\n", + " 'Feb, 2024',\n", " 'Chris Johnson',\n", " 'Oversized Sweaters'\n", " ],\n", " [\n", " '819f4eab1fd76b932fd403ae9f427de8eb9c5b64411d763bb26b5c8c3c30f16f',\n", - " 'March, 2024',\n", + " 'Feb, 2024',\n", " 'Robin Fischer',\n", " 'denim jeans'\n", " ],\n", + " [\n", + " CUSTOMER_ID,\n", + " 'Feb, 2024',\n", + " 'Alex Smith',\n", + " 'oversized sweaters'\n", + " ],\n", "]" ] }, { "cell_type": "code", - "execution_count": 85, + "execution_count": 18, "metadata": { "colab": { "base_uri": "https://localhost:8080/", - "height": 1000 + "height": 626 }, "id": "XsBcFQLlekeI", - "outputId": "974df8d3-1e34-44e2-e355-3c452f3844c6" + "outputId": "ade42c47-69ba-466c-93a6-c7701448db27" }, "outputs": [ { - "name": "stdout", "output_type": "stream", + "name": "stdout", "text": [ - "Running on local URL: http://127.0.0.1:7861\n", + "Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().\n", + "Running on public URL: https://b3e57e0af4b2512826.gradio.live\n", "\n", - "Could not create share link. Missing file: /Users/zachblumenfeld/opt/anaconda3/envs/genai-workshop/lib/python3.10/site-packages/gradio/frpc_darwin_arm64_v0.2. \n", - "\n", - "Please check your internet connection. This can happen if your antivirus software blocks the download of this file. You can install manually by following these steps: \n", - "\n", - "1. Download this file: https://cdn-media.huggingface.co/frpc-gradio-0.2/frpc_darwin_arm64\n", - "2. Rename the downloaded file to: frpc_darwin_arm64_v0.2\n", - "3. Move the file to this location: /Users/zachblumenfeld/opt/anaconda3/envs/genai-workshop/lib/python3.10/site-packages/gradio\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n" + "This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)\n" ] }, { + "output_type": "display_data", "data": { - "text/html": [ - "
" - ], "text/plain": [ "" + ], + "text/html": [ + "
" ] }, - "metadata": {}, - "output_type": "display_data" + "metadata": {} }, { - "name": "stdout", "output_type": "stream", + "name": "stdout", "text": [ "Keyboard interruption in main thread... closing server.\n", - "Killing tunnel 127.0.0.1:7860 <> None\n", - "Killing tunnel 127.0.0.1:7861 <> None\n" + "Killing tunnel 127.0.0.1:7860 <> https://b3e57e0af4b2512826.gradio.live\n" ] }, { + "output_type": "execute_result", "data": { "text/plain": [] }, - "execution_count": 85, "metadata": {}, - "output_type": "execute_result" + "execution_count": 18 } ], "source": [ @@ -876,11 +877,11 @@ "\n", "def message_generator(*x):\n", " chain = get_chain(x[0])\n", - " return chain.invoke({'searchPrompt':x[3], 'customerName':x[2], 'timeOfYear': x[1]})\n", + " return chain.invoke({'customerInterests':x[3], 'customerName':x[2], 'timeOfYear': x[1]})\n", "\n", "customer_id = gr.Textbox(value=CUSTOMER_ID, label=\"Customer ID\")\n", - "time_of_year = gr.Textbox(value=\"March, 2024\", label=\"Time Of Year\")\n", - "search_prompt_txt = gr.Textbox(value='Oversized Sweaters', label=\"Customer Interests(s)\")\n", + "time_of_year = gr.Textbox(value=\"June, 2024\", label=\"Time Of Year\")\n", + "search_prompt_txt = gr.Textbox(value='denim jeans', label=\"Customer Interests(s)\")\n", "customer_name = gr.Textbox(value='Alex Smith', label=\"Customer Name\")\n", "message_result = gr.Markdown( label=\"Message\")\n", "\n", @@ -896,7 +897,7 @@ "cell_type": "code", "execution_count": null, "metadata": { - "collapsed": false + "id": "z_qwUiTZA59W" }, "outputs": [], "source": [] diff --git a/genai-workshop.ipynb b/genai-workshop.ipynb index b0efe51..0e63926 100644 --- a/genai-workshop.ipynb +++ b/genai-workshop.ipynb @@ -8,11 +8,13 @@ }, "source": [ "# Neo4j Generative AI Workshop\n", - "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/guerinjeanmarc/genai-workshop/blob/main/genai-workshop.ipynb)\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/neo4j-product-examples/genai-workshop/blob/main/genai-workshop.ipynb)\n", "\n", - "In this workshop, you will learn how to use Neo4j Knowledge Graphs to make Large Language Models (LLMs) useful for more real-world use cases.\n", + "This workshop will teach you to how to use Neo4j for Graph-Powered Retrieval-Augmented Generation (GraphRAG) to enhance GenAI and improve response quality for real-world applications. \n", "\n", - "We walk through an example that uses real-world customer and product data from a fashion, style, and beauty retailer. We show how you can use a knowledge graph to ground an LLM, enabling it to build tailored marketing content personalized to each customer based on their interests and shared purchase histories. We use a pattern called Retrieval-Augmented Generation (RAG) to accomplish this. Specifically, one that leverages not only vector search but also graph pattern matching and graph machine learning to provide more relevant personalized results to customers.\n", + "GenAI, despite its potential, faces challenges like hallucination and lack of domain knowledge. GraphRAG addresses these issues by combining vector search with knowledge graphs and data science techniques. This integration helps improve context, semantic understanding, and personalization, making Large Language Models (LLMs) more effective for critical applications. \n", + "\n", + "We walk through an example that uses real-world customer and product data from a fashion, style, and beauty retailer. We show how you can use a knowledge graph to ground an LLM, enabling it to build tailored marketing content personalized to each customer based on their interests and shared purchase histories. We use Retrieval-Augmented Generation (RAG) to accomplish this, specifically leveraging not just vector search but also graph pattern matching and graph machine learning to provide more relevant personalized results to customers. We call this graph-powered RAG approach “GraphRAG” for short.\n", "\n", "This notebook walks through the end-to-end process, including:\n", "- Building the knowledge graph\n", @@ -39,419 +41,289 @@ "id": "8yxD7Ah0ZACB" }, "source": [ - "### Some Logics\n", + "### Some Logistics\n", "1. Make a copy of this notebook in Colab by [clicking here](https://colab.research.google.com/github/neo4j-product-examples/genai-workshop/blob/main/genai-workshop.ipynb).\n", "2. Run the pip install below to get the necessary dependencies. this can take a while. Then run the following cell to import relevant libraries\n" ] }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "metadata": { "id": "yY1XylsiZACB", "pycharm": { "name": "#%%capture\n" - } + }, + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "22620a43-1595-41da-b7fd-5260c4bae833" }, "outputs": [ { - "name": "stdout", "output_type": "stream", + "name": "stdout", "text": [ - "Collecting sentence_transformers\r\n", - " Downloading sentence_transformers-2.3.1-py3-none-any.whl.metadata (11 kB)\r\n", - "Collecting langchain\r\n", - " Downloading langchain-0.1.8-py3-none-any.whl.metadata (13 kB)\r\n", - "Collecting langchain-openai\r\n", - " Using cached langchain_openai-0.0.6-py3-none-any.whl.metadata (2.5 kB)\r\n", - "Collecting openai\r\n", - " Using cached openai-1.12.0-py3-none-any.whl.metadata (18 kB)\r\n", - "Collecting tiktoken\r\n", - " Using cached tiktoken-0.6.0-cp310-cp310-macosx_11_0_arm64.whl.metadata (6.6 kB)\r\n", - "Collecting python-dotenv\r\n", - " Downloading python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)\r\n", - "Collecting gradio\r\n", - " Downloading gradio-4.19.1-py3-none-any.whl.metadata (15 kB)\r\n", - "Collecting graphdatascience\r\n", - " Downloading graphdatascience-1.9-py3-none-any.whl.metadata (7.7 kB)\r\n", - "Collecting altair\r\n", - " Downloading altair-5.2.0-py3-none-any.whl.metadata (8.7 kB)\r\n", - "Collecting neo4j_tools\r\n", - " Downloading neo4j_tools-0.5.1-py3-none-any.whl.metadata (1.1 kB)\r\n", - "Collecting transformers<5.0.0,>=4.32.0 (from sentence_transformers)\r\n", - " Downloading transformers-4.37.2-py3-none-any.whl.metadata (129 kB)\r\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m129.4/129.4 kB\u001b[0m \u001b[31m3.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\r\n", - "\u001b[?25hCollecting tqdm (from sentence_transformers)\r\n", - " Downloading tqdm-4.66.2-py3-none-any.whl.metadata (57 kB)\r\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m57.6/57.6 kB\u001b[0m \u001b[31m3.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\r\n", - "\u001b[?25hCollecting torch>=1.11.0 (from sentence_transformers)\r\n", - " Downloading torch-2.2.0-cp310-none-macosx_11_0_arm64.whl.metadata (25 kB)\r\n", - "Collecting numpy (from sentence_transformers)\r\n", - " Downloading numpy-1.26.4-cp310-cp310-macosx_11_0_arm64.whl.metadata (61 kB)\r\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m61.1/61.1 kB\u001b[0m \u001b[31m4.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\r\n", - "\u001b[?25hCollecting scikit-learn (from sentence_transformers)\r\n", - " Downloading scikit_learn-1.4.1.post1-cp310-cp310-macosx_12_0_arm64.whl.metadata (11 kB)\r\n", - "Collecting scipy (from sentence_transformers)\r\n", - " Downloading scipy-1.12.0-cp310-cp310-macosx_12_0_arm64.whl.metadata (112 kB)\r\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m112.9/112.9 kB\u001b[0m \u001b[31m9.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\r\n", - "\u001b[?25hCollecting nltk (from sentence_transformers)\r\n", - " Downloading nltk-3.8.1-py3-none-any.whl.metadata (2.8 kB)\r\n", - "Collecting sentencepiece (from sentence_transformers)\r\n", - " Downloading sentencepiece-0.2.0-cp310-cp310-macosx_11_0_arm64.whl.metadata (7.7 kB)\r\n", - "Collecting huggingface-hub>=0.15.1 (from sentence_transformers)\r\n", - " Downloading huggingface_hub-0.20.3-py3-none-any.whl.metadata (12 kB)\r\n", - "Collecting Pillow (from sentence_transformers)\r\n", - " Downloading pillow-10.2.0-cp310-cp310-macosx_11_0_arm64.whl.metadata (9.7 kB)\r\n", - "Requirement already satisfied: PyYAML>=5.3 in ./env/lib/python3.10/site-packages (from langchain) (6.0.1)\r\n", - "Collecting SQLAlchemy<3,>=1.4 (from langchain)\r\n", - " Downloading SQLAlchemy-2.0.27-cp310-cp310-macosx_11_0_arm64.whl.metadata (9.6 kB)\r\n", - "Collecting aiohttp<4.0.0,>=3.8.3 (from langchain)\r\n", - " Downloading aiohttp-3.9.3-cp310-cp310-macosx_11_0_arm64.whl.metadata (7.4 kB)\r\n", - "Collecting async-timeout<5.0.0,>=4.0.0 (from langchain)\r\n", - " Using cached async_timeout-4.0.3-py3-none-any.whl.metadata (4.2 kB)\r\n", - "Collecting dataclasses-json<0.7,>=0.5.7 (from langchain)\r\n", - " Downloading dataclasses_json-0.6.4-py3-none-any.whl.metadata (25 kB)\r\n", - "Collecting jsonpatch<2.0,>=1.33 (from langchain)\r\n", - " Using cached jsonpatch-1.33-py2.py3-none-any.whl.metadata (3.0 kB)\r\n", - "Collecting langchain-community<0.1,>=0.0.21 (from langchain)\r\n", - " Downloading langchain_community-0.0.21-py3-none-any.whl.metadata (8.1 kB)\r\n", - "Collecting langchain-core<0.2,>=0.1.24 (from langchain)\r\n", - " Downloading langchain_core-0.1.25-py3-none-any.whl.metadata (6.0 kB)\r\n", - "Collecting langsmith<0.2.0,>=0.1.0 (from langchain)\r\n", - " Downloading langsmith-0.1.5-py3-none-any.whl.metadata (13 kB)\r\n", - "Collecting pydantic<3,>=1 (from langchain)\r\n", - " Downloading pydantic-2.6.1-py3-none-any.whl.metadata (83 kB)\r\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m83.5/83.5 kB\u001b[0m \u001b[31m4.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\r\n", - "\u001b[?25hRequirement already satisfied: requests<3,>=2 in ./env/lib/python3.10/site-packages (from langchain) (2.31.0)\r\n", - "Collecting tenacity<9.0.0,>=8.1.0 (from langchain)\r\n", - " Using cached tenacity-8.2.3-py3-none-any.whl.metadata (1.0 kB)\r\n", - "Requirement already satisfied: anyio<5,>=3.5.0 in ./env/lib/python3.10/site-packages (from openai) (4.2.0)\r\n", - "Collecting distro<2,>=1.7.0 (from openai)\r\n", - " Using cached distro-1.9.0-py3-none-any.whl.metadata (6.8 kB)\r\n", - "Collecting httpx<1,>=0.23.0 (from openai)\r\n", - " Downloading httpx-0.26.0-py3-none-any.whl.metadata (7.6 kB)\r\n", - "Requirement already satisfied: sniffio in ./env/lib/python3.10/site-packages (from openai) (1.3.0)\r\n", - "Requirement already satisfied: typing-extensions<5,>=4.7 in ./env/lib/python3.10/site-packages (from openai) (4.9.0)\r\n", - "Collecting regex>=2022.1.18 (from tiktoken)\r\n", - " Downloading regex-2023.12.25-cp310-cp310-macosx_11_0_arm64.whl.metadata (40 kB)\r\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m40.9/40.9 kB\u001b[0m \u001b[31m3.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\r\n", - "\u001b[?25hCollecting aiofiles<24.0,>=22.0 (from gradio)\r\n", - " Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)\r\n", - "Collecting fastapi (from gradio)\r\n", - " Downloading fastapi-0.109.2-py3-none-any.whl.metadata (25 kB)\r\n", - "Collecting ffmpy (from gradio)\r\n", - " Downloading ffmpy-0.3.2.tar.gz (5.5 kB)\r\n", - " Preparing metadata (setup.py) ... \u001b[?25ldone\r\n", - "\u001b[?25hCollecting gradio-client==0.10.0 (from gradio)\r\n", - " Downloading gradio_client-0.10.0-py3-none-any.whl.metadata (7.1 kB)\r\n", - "Collecting importlib-resources<7.0,>=1.3 (from gradio)\r\n", - " Using cached importlib_resources-6.1.1-py3-none-any.whl.metadata (4.1 kB)\r\n", - "Requirement already satisfied: jinja2<4.0 in ./env/lib/python3.10/site-packages (from gradio) (3.1.3)\r\n", - "Requirement already satisfied: markupsafe~=2.0 in ./env/lib/python3.10/site-packages (from gradio) (2.1.3)\r\n", - "Collecting matplotlib~=3.0 (from gradio)\r\n", - " Downloading matplotlib-3.8.3-cp310-cp310-macosx_11_0_arm64.whl.metadata (5.8 kB)\r\n", - "Collecting orjson~=3.0 (from gradio)\r\n", - " Downloading orjson-3.9.14-cp310-cp310-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl.metadata (49 kB)\r\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.4/49.4 kB\u001b[0m \u001b[31m3.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\r\n", - "\u001b[?25hRequirement already satisfied: packaging in ./env/lib/python3.10/site-packages (from gradio) (23.1)\r\n", - "Collecting pandas<3.0,>=1.0 (from gradio)\r\n", - " Downloading pandas-2.2.0-cp310-cp310-macosx_11_0_arm64.whl.metadata (19 kB)\r\n", - "Collecting pydub (from gradio)\r\n", - " Using cached pydub-0.25.1-py2.py3-none-any.whl (32 kB)\r\n", - "Collecting python-multipart>=0.0.9 (from gradio)\r\n", - " Downloading python_multipart-0.0.9-py3-none-any.whl.metadata (2.5 kB)\r\n", - "Collecting ruff>=0.1.7 (from gradio)\r\n", - " Downloading ruff-0.2.2-py3-none-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl.metadata (23 kB)\r\n", - "Collecting semantic-version~=2.0 (from gradio)\r\n", - " Using cached semantic_version-2.10.0-py2.py3-none-any.whl (15 kB)\r\n", - "Collecting tomlkit==0.12.0 (from gradio)\r\n", - " Using cached tomlkit-0.12.0-py3-none-any.whl.metadata (2.7 kB)\r\n", - "Collecting typer<1.0,>=0.9 (from typer[all]<1.0,>=0.9->gradio)\r\n", - " Downloading typer-0.9.0-py3-none-any.whl.metadata (14 kB)\r\n", - "Collecting uvicorn>=0.14.0 (from gradio)\r\n", - " Downloading uvicorn-0.27.1-py3-none-any.whl.metadata (6.3 kB)\r\n", - "Collecting fsspec (from gradio-client==0.10.0->gradio)\r\n", - " Downloading fsspec-2024.2.0-py3-none-any.whl.metadata (6.8 kB)\r\n", - "Collecting websockets<12.0,>=10.0 (from gradio-client==0.10.0->gradio)\r\n", - " Downloading websockets-11.0.3-cp310-cp310-macosx_11_0_arm64.whl.metadata (6.6 kB)\r\n", - "Collecting multimethod<2.0,>=1.0 (from graphdatascience)\r\n", - " Downloading multimethod-1.11.1-py3-none-any.whl.metadata (8.8 kB)\r\n", - "Collecting neo4j<6.0,>=4.4.2 (from graphdatascience)\r\n", - " Downloading neo4j-5.17.0.tar.gz (197 kB)\r\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m197.8/197.8 kB\u001b[0m \u001b[31m10.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\r\n", - "\u001b[?25h Installing build dependencies ... \u001b[?25ldone\r\n", - "\u001b[?25h Getting requirements to build wheel ... \u001b[?25ldone\r\n", - "\u001b[?25h Installing backend dependencies ... \u001b[?25ldone\r\n", - "\u001b[?25h Preparing metadata (pyproject.toml) ... \u001b[?25ldone\r\n", - "\u001b[?25hCollecting pyarrow<15.0,>=10.0 (from graphdatascience)\r\n", - " Downloading pyarrow-14.0.2-cp310-cp310-macosx_11_0_arm64.whl.metadata (3.0 kB)\r\n", - "Collecting textdistance<5.0,>=4.0 (from graphdatascience)\r\n", - " Downloading textdistance-4.6.1-py3-none-any.whl.metadata (18 kB)\r\n", - "Requirement already satisfied: jsonschema>=3.0 in ./env/lib/python3.10/site-packages (from altair) (4.19.2)\r\n", - "Collecting toolz (from altair)\r\n", - " Downloading toolz-0.12.1-py3-none-any.whl.metadata (5.1 kB)\r\n", - "Collecting aiosignal>=1.1.2 (from aiohttp<4.0.0,>=3.8.3->langchain)\r\n", - " Downloading aiosignal-1.3.1-py3-none-any.whl.metadata (4.0 kB)\r\n", - "Requirement already satisfied: attrs>=17.3.0 in ./env/lib/python3.10/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (23.1.0)\r\n", - "Collecting frozenlist>=1.1.1 (from aiohttp<4.0.0,>=3.8.3->langchain)\r\n", - " Downloading frozenlist-1.4.1-cp310-cp310-macosx_11_0_arm64.whl.metadata (12 kB)\r\n", - "Collecting multidict<7.0,>=4.5 (from aiohttp<4.0.0,>=3.8.3->langchain)\r\n", - " Downloading multidict-6.0.5-cp310-cp310-macosx_11_0_arm64.whl.metadata (4.2 kB)\r\n", - "Collecting yarl<2.0,>=1.0 (from aiohttp<4.0.0,>=3.8.3->langchain)\r\n", - " Downloading yarl-1.9.4-cp310-cp310-macosx_11_0_arm64.whl.metadata (31 kB)\r\n", - "Requirement already satisfied: idna>=2.8 in ./env/lib/python3.10/site-packages (from anyio<5,>=3.5.0->openai) (3.4)\r\n", - "Requirement already satisfied: exceptiongroup>=1.0.2 in ./env/lib/python3.10/site-packages (from anyio<5,>=3.5.0->openai) (1.2.0)\r\n", - "Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain)\r\n", - " Downloading marshmallow-3.20.2-py3-none-any.whl.metadata (7.5 kB)\r\n", - "Collecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langchain)\r\n", - " Using cached typing_inspect-0.9.0-py3-none-any.whl.metadata (1.5 kB)\r\n", - "Requirement already satisfied: certifi in ./env/lib/python3.10/site-packages (from httpx<1,>=0.23.0->openai) (2024.2.2)\r\n", - "Collecting httpcore==1.* (from httpx<1,>=0.23.0->openai)\r\n", - " Downloading httpcore-1.0.3-py3-none-any.whl.metadata (20 kB)\r\n", - "Collecting h11<0.15,>=0.13 (from httpcore==1.*->httpx<1,>=0.23.0->openai)\r\n", - " Downloading h11-0.14.0-py3-none-any.whl.metadata (8.2 kB)\r\n", - "Collecting filelock (from huggingface-hub>=0.15.1->sentence_transformers)\r\n", - " Downloading filelock-3.13.1-py3-none-any.whl.metadata (2.8 kB)\r\n", - "Collecting jsonpointer>=1.9 (from jsonpatch<2.0,>=1.33->langchain)\r\n", - " Using cached jsonpointer-2.4-py2.py3-none-any.whl.metadata (2.5 kB)\r\n", - "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in ./env/lib/python3.10/site-packages (from jsonschema>=3.0->altair) (2023.7.1)\r\n", - "Requirement already satisfied: referencing>=0.28.4 in ./env/lib/python3.10/site-packages (from jsonschema>=3.0->altair) (0.30.2)\r\n", - "Requirement already satisfied: rpds-py>=0.7.1 in ./env/lib/python3.10/site-packages (from jsonschema>=3.0->altair) (0.10.6)\r\n", - "Collecting packaging (from gradio)\r\n", - " Using cached packaging-23.2-py3-none-any.whl.metadata (3.2 kB)\r\n", - "Collecting contourpy>=1.0.1 (from matplotlib~=3.0->gradio)\r\n", - " Using cached contourpy-1.2.0-cp310-cp310-macosx_11_0_arm64.whl.metadata (5.8 kB)\r\n", - "Collecting cycler>=0.10 (from matplotlib~=3.0->gradio)\r\n", - " Using cached cycler-0.12.1-py3-none-any.whl.metadata (3.8 kB)\r\n", - "Collecting fonttools>=4.22.0 (from matplotlib~=3.0->gradio)\r\n", - " Downloading fonttools-4.49.0-cp310-cp310-macosx_10_9_universal2.whl.metadata (159 kB)\r\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m159.1/159.1 kB\u001b[0m \u001b[31m7.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\r\n", - "\u001b[?25hCollecting kiwisolver>=1.3.1 (from matplotlib~=3.0->gradio)\r\n", - " Using cached kiwisolver-1.4.5-cp310-cp310-macosx_11_0_arm64.whl.metadata (6.4 kB)\r\n", - "Collecting pyparsing>=2.3.1 (from matplotlib~=3.0->gradio)\r\n", - " Using cached pyparsing-3.1.1-py3-none-any.whl.metadata (5.1 kB)\r\n", - "Requirement already satisfied: python-dateutil>=2.7 in ./env/lib/python3.10/site-packages (from matplotlib~=3.0->gradio) (2.8.2)\r\n", - "Requirement already satisfied: pytz in ./env/lib/python3.10/site-packages (from neo4j<6.0,>=4.4.2->graphdatascience) (2023.3.post1)\r\n", - "Collecting tzdata>=2022.7 (from pandas<3.0,>=1.0->gradio)\r\n", - " Downloading tzdata-2024.1-py2.py3-none-any.whl.metadata (1.4 kB)\r\n", - "Collecting annotated-types>=0.4.0 (from pydantic<3,>=1->langchain)\r\n", - " Using cached annotated_types-0.6.0-py3-none-any.whl.metadata (12 kB)\r\n", - "Collecting pydantic-core==2.16.2 (from pydantic<3,>=1->langchain)\r\n", - " Downloading pydantic_core-2.16.2-cp310-cp310-macosx_11_0_arm64.whl.metadata (6.5 kB)\r\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in ./env/lib/python3.10/site-packages (from requests<3,>=2->langchain) (2.0.4)\r\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in ./env/lib/python3.10/site-packages (from requests<3,>=2->langchain) (2.1.0)\r\n", - "Collecting sympy (from torch>=1.11.0->sentence_transformers)\r\n", - " Downloading sympy-1.12-py3-none-any.whl.metadata (12 kB)\r\n", - "Collecting networkx (from torch>=1.11.0->sentence_transformers)\r\n", - " Downloading networkx-3.2.1-py3-none-any.whl.metadata (5.2 kB)\r\n", - "Collecting tokenizers<0.19,>=0.14 (from transformers<5.0.0,>=4.32.0->sentence_transformers)\r\n", - " Downloading tokenizers-0.15.2-cp310-cp310-macosx_11_0_arm64.whl.metadata (6.7 kB)\r\n", - "Collecting safetensors>=0.4.1 (from transformers<5.0.0,>=4.32.0->sentence_transformers)\r\n", - " Downloading safetensors-0.4.2-cp310-cp310-macosx_11_0_arm64.whl.metadata (3.8 kB)\r\n", - "Collecting click<9.0.0,>=7.1.1 (from typer<1.0,>=0.9->typer[all]<1.0,>=0.9->gradio)\r\n", - " Using cached click-8.1.7-py3-none-any.whl.metadata (3.0 kB)\r\n", - "Collecting colorama<0.5.0,>=0.4.3 (from typer[all]<1.0,>=0.9->gradio)\r\n", - " Downloading colorama-0.4.6-py2.py3-none-any.whl.metadata (17 kB)\r\n", - "Collecting shellingham<2.0.0,>=1.3.0 (from typer[all]<1.0,>=0.9->gradio)\r\n", - " Using cached shellingham-1.5.4-py2.py3-none-any.whl.metadata (3.5 kB)\r\n", - "Collecting rich<14.0.0,>=10.11.0 (from typer[all]<1.0,>=0.9->gradio)\r\n", - " Using cached rich-13.7.0-py3-none-any.whl.metadata (18 kB)\r\n", - "Collecting starlette<0.37.0,>=0.36.3 (from fastapi->gradio)\r\n", - " Downloading starlette-0.36.3-py3-none-any.whl.metadata (5.9 kB)\r\n", - "Collecting joblib (from nltk->sentence_transformers)\r\n", - " Using cached joblib-1.3.2-py3-none-any.whl.metadata (5.4 kB)\r\n", - "Collecting threadpoolctl>=2.0.0 (from scikit-learn->sentence_transformers)\r\n", - " Downloading threadpoolctl-3.3.0-py3-none-any.whl.metadata (13 kB)\r\n", - "Requirement already satisfied: six>=1.5 in ./env/lib/python3.10/site-packages (from python-dateutil>=2.7->matplotlib~=3.0->gradio) (1.16.0)\r\n", - "Collecting markdown-it-py>=2.2.0 (from rich<14.0.0,>=10.11.0->typer[all]<1.0,>=0.9->gradio)\r\n", - " Using cached markdown_it_py-3.0.0-py3-none-any.whl.metadata (6.9 kB)\r\n", - "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in ./env/lib/python3.10/site-packages (from rich<14.0.0,>=10.11.0->typer[all]<1.0,>=0.9->gradio) (2.15.1)\r\n", - "Collecting mypy-extensions>=0.3.0 (from typing-inspect<1,>=0.4.0->dataclasses-json<0.7,>=0.5.7->langchain)\r\n", - " Downloading mypy_extensions-1.0.0-py3-none-any.whl.metadata (1.1 kB)\r\n", - "Collecting mpmath>=0.19 (from sympy->torch>=1.11.0->sentence_transformers)\r\n", - " Downloading mpmath-1.3.0-py3-none-any.whl.metadata (8.6 kB)\r\n", - "Collecting mdurl~=0.1 (from markdown-it-py>=2.2.0->rich<14.0.0,>=10.11.0->typer[all]<1.0,>=0.9->gradio)\r\n", - " Using cached mdurl-0.1.2-py3-none-any.whl (10.0 kB)\r\n", - "Downloading sentence_transformers-2.3.1-py3-none-any.whl (132 kB)\r\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m132.8/132.8 kB\u001b[0m \u001b[31m6.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\r\n", - "\u001b[?25hDownloading langchain-0.1.8-py3-none-any.whl (816 kB)\r\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m816.1/816.1 kB\u001b[0m \u001b[31m9.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m:00:01\u001b[0m00:01\u001b[0m\r\n", - "\u001b[?25hUsing cached langchain_openai-0.0.6-py3-none-any.whl (29 kB)\r\n", - "Using cached openai-1.12.0-py3-none-any.whl (226 kB)\r\n", - "Using cached tiktoken-0.6.0-cp310-cp310-macosx_11_0_arm64.whl (949 kB)\r\n", - "Downloading python_dotenv-1.0.1-py3-none-any.whl (19 kB)\r\n", - "Downloading gradio-4.19.1-py3-none-any.whl (16.9 MB)\r\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m16.9/16.9 MB\u001b[0m \u001b[31m23.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\r\n", - "\u001b[?25hDownloading gradio_client-0.10.0-py3-none-any.whl (307 kB)\r\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m307.7/307.7 kB\u001b[0m \u001b[31m26.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\r\n", - "\u001b[?25hUsing cached tomlkit-0.12.0-py3-none-any.whl (37 kB)\r\n", - "Downloading graphdatascience-1.9-py3-none-any.whl (1.6 MB)\r\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.6/1.6 MB\u001b[0m \u001b[31m29.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m\r\n", - "\u001b[?25hDownloading altair-5.2.0-py3-none-any.whl (996 kB)\r\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m996.9/996.9 kB\u001b[0m \u001b[31m37.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\r\n", - "\u001b[?25hDownloading neo4j_tools-0.5.1-py3-none-any.whl (8.0 kB)\r\n", - "Downloading aiofiles-23.2.1-py3-none-any.whl (15 kB)\r\n", - "Downloading aiohttp-3.9.3-cp310-cp310-macosx_11_0_arm64.whl (387 kB)\r\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m387.4/387.4 kB\u001b[0m \u001b[31m26.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\r\n", - "\u001b[?25hUsing cached async_timeout-4.0.3-py3-none-any.whl (5.7 kB)\r\n", - "Downloading dataclasses_json-0.6.4-py3-none-any.whl (28 kB)\r\n", - "Using cached distro-1.9.0-py3-none-any.whl (20 kB)\r\n", - "Downloading httpx-0.26.0-py3-none-any.whl (75 kB)\r\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m75.9/75.9 kB\u001b[0m \u001b[31m8.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\r\n", - "\u001b[?25hDownloading httpcore-1.0.3-py3-none-any.whl (77 kB)\r\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m77.0/77.0 kB\u001b[0m \u001b[31m8.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\r\n", - "\u001b[?25hDownloading huggingface_hub-0.20.3-py3-none-any.whl (330 kB)\r\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m330.1/330.1 kB\u001b[0m \u001b[31m21.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\r\n", - "\u001b[?25hUsing cached importlib_resources-6.1.1-py3-none-any.whl (33 kB)\r\n", - "Using cached jsonpatch-1.33-py2.py3-none-any.whl (12 kB)\r\n", - "Downloading langchain_community-0.0.21-py3-none-any.whl (1.7 MB)\r\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m36.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m\r\n", - "\u001b[?25hDownloading langchain_core-0.1.25-py3-none-any.whl (242 kB)\r\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m242.1/242.1 kB\u001b[0m \u001b[31m20.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\r\n", - "\u001b[?25hDownloading langsmith-0.1.5-py3-none-any.whl (61 kB)\r\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m61.0/61.0 kB\u001b[0m \u001b[31m6.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\r\n", - "\u001b[?25hDownloading matplotlib-3.8.3-cp310-cp310-macosx_11_0_arm64.whl (7.5 MB)\r\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m7.5/7.5 MB\u001b[0m \u001b[31m43.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\r\n", - "\u001b[?25hDownloading multimethod-1.11.1-py3-none-any.whl (10 kB)\r\n", - "Downloading numpy-1.26.4-cp310-cp310-macosx_11_0_arm64.whl (14.0 MB)\r\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m14.0/14.0 MB\u001b[0m \u001b[31m39.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\r\n", - "\u001b[?25hDownloading orjson-3.9.14-cp310-cp310-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl (253 kB)\r\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m253.2/253.2 kB\u001b[0m \u001b[31m28.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\r\n", - "\u001b[?25hUsing cached packaging-23.2-py3-none-any.whl (53 kB)\r\n", - "Downloading pandas-2.2.0-cp310-cp310-macosx_11_0_arm64.whl (11.8 MB)\r\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m11.8/11.8 MB\u001b[0m \u001b[31m49.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\r\n", - "\u001b[?25hDownloading pillow-10.2.0-cp310-cp310-macosx_11_0_arm64.whl (3.3 MB)\r\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.3/3.3 MB\u001b[0m \u001b[31m55.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\r\n", - "\u001b[?25hDownloading pyarrow-14.0.2-cp310-cp310-macosx_11_0_arm64.whl (24.0 MB)\r\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m24.0/24.0 MB\u001b[0m \u001b[31m52.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\r\n", - "\u001b[?25hDownloading pydantic-2.6.1-py3-none-any.whl (394 kB)\r\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m394.8/394.8 kB\u001b[0m \u001b[31m29.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\r\n", - "\u001b[?25hDownloading pydantic_core-2.16.2-cp310-cp310-macosx_11_0_arm64.whl (1.7 MB)\r\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m40.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m\r\n", - "\u001b[?25hDownloading python_multipart-0.0.9-py3-none-any.whl (22 kB)\r\n", - "Downloading regex-2023.12.25-cp310-cp310-macosx_11_0_arm64.whl (291 kB)\r\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m291.0/291.0 kB\u001b[0m \u001b[31m28.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\r\n", - "\u001b[?25hDownloading ruff-0.2.2-py3-none-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl (14.9 MB)\r\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m14.9/14.9 MB\u001b[0m \u001b[31m53.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\r\n", - "\u001b[?25hDownloading SQLAlchemy-2.0.27-cp310-cp310-macosx_11_0_arm64.whl (2.1 MB)\r\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.1/2.1 MB\u001b[0m \u001b[31m50.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m\r\n", - "\u001b[?25hUsing cached tenacity-8.2.3-py3-none-any.whl (24 kB)\r\n", - "Downloading textdistance-4.6.1-py3-none-any.whl (31 kB)\r\n", - "Downloading torch-2.2.0-cp310-none-macosx_11_0_arm64.whl (59.7 MB)\r\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m59.7/59.7 MB\u001b[0m \u001b[31m58.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\r\n", - "\u001b[?25hDownloading tqdm-4.66.2-py3-none-any.whl (78 kB)\r\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m78.3/78.3 kB\u001b[0m \u001b[31m7.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\r\n", - "\u001b[?25hDownloading transformers-4.37.2-py3-none-any.whl (8.4 MB)\r\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m8.4/8.4 MB\u001b[0m \u001b[31m43.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\r\n", - "\u001b[?25hUsing cached typer-0.9.0-py3-none-any.whl (45 kB)\r\n", - "Downloading uvicorn-0.27.1-py3-none-any.whl (60 kB)\r\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m60.8/60.8 kB\u001b[0m \u001b[31m6.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\r\n", - "\u001b[?25hDownloading fastapi-0.109.2-py3-none-any.whl (92 kB)\r\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m92.1/92.1 kB\u001b[0m \u001b[31m10.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\r\n", - "\u001b[?25hUsing cached nltk-3.8.1-py3-none-any.whl (1.5 MB)\r\n", - "Downloading scikit_learn-1.4.1.post1-cp310-cp310-macosx_12_0_arm64.whl (10.4 MB)\r\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m10.4/10.4 MB\u001b[0m \u001b[31m51.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m \u001b[36m0:00:01\u001b[0m\r\n", - "\u001b[?25hDownloading scipy-1.12.0-cp310-cp310-macosx_12_0_arm64.whl (31.4 MB)\r\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m31.4/31.4 MB\u001b[0m \u001b[31m62.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\r\n", - "\u001b[?25hDownloading sentencepiece-0.2.0-cp310-cp310-macosx_11_0_arm64.whl (1.2 MB)\r\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.2/1.2 MB\u001b[0m \u001b[31m55.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\r\n", - "\u001b[?25hDownloading toolz-0.12.1-py3-none-any.whl (56 kB)\r\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m56.1/56.1 kB\u001b[0m \u001b[31m5.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\r\n", - "\u001b[?25hUsing cached aiosignal-1.3.1-py3-none-any.whl (7.6 kB)\r\n", - "Using cached annotated_types-0.6.0-py3-none-any.whl (12 kB)\r\n", - "Using cached click-8.1.7-py3-none-any.whl (97 kB)\r\n", - "Using cached colorama-0.4.6-py2.py3-none-any.whl (25 kB)\r\n", - "Using cached contourpy-1.2.0-cp310-cp310-macosx_11_0_arm64.whl (242 kB)\r\n", - "Using cached cycler-0.12.1-py3-none-any.whl (8.3 kB)\r\n", - "Downloading fonttools-4.49.0-cp310-cp310-macosx_10_9_universal2.whl (2.8 MB)\r\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.8/2.8 MB\u001b[0m \u001b[31m62.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m\r\n", - "\u001b[?25hDownloading frozenlist-1.4.1-cp310-cp310-macosx_11_0_arm64.whl (52 kB)\r\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m52.2/52.2 kB\u001b[0m \u001b[31m4.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\r\n", - "\u001b[?25hDownloading fsspec-2024.2.0-py3-none-any.whl (170 kB)\r\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m170.9/170.9 kB\u001b[0m \u001b[31m17.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\r\n", - "\u001b[?25hUsing cached h11-0.14.0-py3-none-any.whl (58 kB)\r\n", - "Using cached joblib-1.3.2-py3-none-any.whl (302 kB)\r\n", - "Using cached jsonpointer-2.4-py2.py3-none-any.whl (7.8 kB)\r\n", - "Using cached kiwisolver-1.4.5-cp310-cp310-macosx_11_0_arm64.whl (66 kB)\r\n", - "Downloading marshmallow-3.20.2-py3-none-any.whl (49 kB)\r\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.4/49.4 kB\u001b[0m \u001b[31m4.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\r\n", - "\u001b[?25hDownloading multidict-6.0.5-cp310-cp310-macosx_11_0_arm64.whl (30 kB)\r\n", - "Using cached pyparsing-3.1.1-py3-none-any.whl (103 kB)\r\n", - "Using cached rich-13.7.0-py3-none-any.whl (240 kB)\r\n", - "Downloading safetensors-0.4.2-cp310-cp310-macosx_11_0_arm64.whl (393 kB)\r\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m393.4/393.4 kB\u001b[0m \u001b[31m36.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\r\n", - "\u001b[?25hUsing cached shellingham-1.5.4-py2.py3-none-any.whl (9.8 kB)\r\n", - "Downloading starlette-0.36.3-py3-none-any.whl (71 kB)\r\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m71.5/71.5 kB\u001b[0m \u001b[31m7.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\r\n", - "\u001b[?25hDownloading threadpoolctl-3.3.0-py3-none-any.whl (17 kB)\r\n", - "Downloading tokenizers-0.15.2-cp310-cp310-macosx_11_0_arm64.whl (2.4 MB)\r\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.4/2.4 MB\u001b[0m \u001b[31m63.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\r\n", - "\u001b[?25hUsing cached typing_inspect-0.9.0-py3-none-any.whl (8.8 kB)\r\n", - "Downloading tzdata-2024.1-py2.py3-none-any.whl (345 kB)\r\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m345.4/345.4 kB\u001b[0m \u001b[31m31.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\r\n", - "\u001b[?25hUsing cached websockets-11.0.3-cp310-cp310-macosx_11_0_arm64.whl (121 kB)\r\n", - "Downloading yarl-1.9.4-cp310-cp310-macosx_11_0_arm64.whl (79 kB)\r\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m79.2/79.2 kB\u001b[0m \u001b[31m6.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\r\n", - "\u001b[?25hDownloading filelock-3.13.1-py3-none-any.whl (11 kB)\r\n", - "Downloading networkx-3.2.1-py3-none-any.whl (1.6 MB)\r\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.6/1.6 MB\u001b[0m \u001b[31m63.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\r\n", - "\u001b[?25hUsing cached sympy-1.12-py3-none-any.whl (5.7 MB)\r\n", - "Using cached markdown_it_py-3.0.0-py3-none-any.whl (87 kB)\r\n", - "Using cached mpmath-1.3.0-py3-none-any.whl (536 kB)\r\n", - "Using cached mypy_extensions-1.0.0-py3-none-any.whl (4.7 kB)\r\n", - "Building wheels for collected packages: neo4j, ffmpy\r\n", - " Building wheel for neo4j (pyproject.toml) ... \u001b[?25ldone\r\n", - "\u001b[?25h Created wheel for neo4j: filename=neo4j-5.17.0-py3-none-any.whl size=273834 sha256=802f49a95a89aed5ee42402eaa7b4617e87dee75159f982c4a76a3e8f50e3c34\r\n", - " Stored in directory: /Users/zachblumenfeld/Library/Caches/pip/wheels/26/a1/15/63d729065b1a6a8afce3343003ca05bdbed2c4c05a707da4a3\r\n", - " Building wheel for ffmpy (setup.py) ... \u001b[?25ldone\r\n", - "\u001b[?25h Created wheel for ffmpy: filename=ffmpy-0.3.2-py3-none-any.whl size=5582 sha256=87828ad2036a15011e579cd9d1b78850057b81e5cf70d9b77afd73e5fd75e96d\r\n", - " Stored in directory: /Users/zachblumenfeld/Library/Caches/pip/wheels/bd/65/9a/671fc6dcde07d4418df0c592f8df512b26d7a0029c2a23dd81\r\n", - "Successfully built neo4j ffmpy\r\n", - "Installing collected packages: sentencepiece, pydub, mpmath, ffmpy, websockets, tzdata, tqdm, toolz, tomlkit, threadpoolctl, textdistance, tenacity, sympy, SQLAlchemy, shellingham, semantic-version, safetensors, ruff, regex, python-multipart, python-dotenv, pyparsing, pydantic-core, Pillow, packaging, orjson, numpy, networkx, neo4j, mypy-extensions, multimethod, multidict, mdurl, kiwisolver, jsonpointer, joblib, importlib-resources, h11, fsspec, frozenlist, fonttools, filelock, distro, cycler, colorama, click, async-timeout, annotated-types, aiofiles, yarl, uvicorn, typing-inspect, typer, torch, tiktoken, starlette, scipy, pydantic, pyarrow, pandas, nltk, marshmallow, markdown-it-py, jsonpatch, huggingface-hub, httpcore, contourpy, aiosignal, tokenizers, scikit-learn, rich, matplotlib, langsmith, httpx, graphdatascience, fastapi, dataclasses-json, aiohttp, transformers, openai, neo4j_tools, langchain-core, gradio-client, altair, sentence_transformers, langchain-openai, langchain-community, gradio, langchain\r\n", - " Attempting uninstall: packaging\r\n", - " Found existing installation: packaging 23.1\r\n", - " Uninstalling packaging-23.1:\r\n", - " Successfully uninstalled packaging-23.1\r\n", - "Successfully installed Pillow-10.2.0 SQLAlchemy-2.0.27 aiofiles-23.2.1 aiohttp-3.9.3 aiosignal-1.3.1 altair-5.2.0 annotated-types-0.6.0 async-timeout-4.0.3 click-8.1.7 colorama-0.4.6 contourpy-1.2.0 cycler-0.12.1 dataclasses-json-0.6.4 distro-1.9.0 fastapi-0.109.2 ffmpy-0.3.2 filelock-3.13.1 fonttools-4.49.0 frozenlist-1.4.1 fsspec-2024.2.0 gradio-4.19.1 gradio-client-0.10.0 graphdatascience-1.9 h11-0.14.0 httpcore-1.0.3 httpx-0.26.0 huggingface-hub-0.20.3 importlib-resources-6.1.1 joblib-1.3.2 jsonpatch-1.33 jsonpointer-2.4 kiwisolver-1.4.5 langchain-0.1.8 langchain-community-0.0.21 langchain-core-0.1.25 langchain-openai-0.0.6 langsmith-0.1.5 markdown-it-py-3.0.0 marshmallow-3.20.2 matplotlib-3.8.3 mdurl-0.1.2 mpmath-1.3.0 multidict-6.0.5 multimethod-1.11.1 mypy-extensions-1.0.0 neo4j-5.17.0 neo4j_tools-0.5.1 networkx-3.2.1 nltk-3.8.1 numpy-1.26.4 openai-1.12.0 orjson-3.9.14 packaging-23.2 pandas-2.2.0 pyarrow-14.0.2 pydantic-2.6.1 pydantic-core-2.16.2 pydub-0.25.1 pyparsing-3.1.1 python-dotenv-1.0.1 python-multipart-0.0.9 regex-2023.12.25 rich-13.7.0 ruff-0.2.2 safetensors-0.4.2 scikit-learn-1.4.1.post1 scipy-1.12.0 semantic-version-2.10.0 sentence_transformers-2.3.1 sentencepiece-0.2.0 shellingham-1.5.4 starlette-0.36.3 sympy-1.12 tenacity-8.2.3 textdistance-4.6.1 threadpoolctl-3.3.0 tiktoken-0.6.0 tokenizers-0.15.2 tomlkit-0.12.0 toolz-0.12.1 torch-2.2.0 tqdm-4.66.2 transformers-4.37.2 typer-0.9.0 typing-inspect-0.9.0 tzdata-2024.1 uvicorn-0.27.1 websockets-11.0.3 yarl-1.9.4\r\n", - "Note: you may need to restart the kernel to use updated packages.\n", - "Collecting vegafusion[embed]\r\n", - " Downloading vegafusion-1.6.4-py3-none-any.whl.metadata (1.3 kB)\r\n", - "Requirement already satisfied: altair>=4.2.0 in ./env/lib/python3.10/site-packages (from vegafusion[embed]) (5.2.0)\r\n", - "Requirement already satisfied: pyarrow>=5 in ./env/lib/python3.10/site-packages (from vegafusion[embed]) (14.0.2)\r\n", - "Requirement already satisfied: pandas in ./env/lib/python3.10/site-packages (from vegafusion[embed]) (2.2.0)\r\n", - "Requirement already satisfied: psutil in ./env/lib/python3.10/site-packages (from vegafusion[embed]) (5.9.0)\r\n", - "Collecting protobuf (from vegafusion[embed])\r\n", - " Downloading protobuf-4.25.3-cp37-abi3-macosx_10_9_universal2.whl.metadata (541 bytes)\r\n", - "Collecting vegafusion-python-embed==1.6.4 (from vegafusion[embed])\r\n", - " Downloading vegafusion_python_embed-1.6.4-cp38-abi3-macosx_11_0_arm64.whl.metadata (394 bytes)\r\n", - "Collecting vl-convert-python>=0.7.0 (from vegafusion[embed])\r\n", - " Downloading vl_convert_python-1.2.3-cp37-abi3-macosx_11_0_arm64.whl.metadata (5.2 kB)\r\n", - "Requirement already satisfied: jinja2 in ./env/lib/python3.10/site-packages (from altair>=4.2.0->vegafusion[embed]) (3.1.3)\r\n", - "Requirement already satisfied: jsonschema>=3.0 in ./env/lib/python3.10/site-packages (from altair>=4.2.0->vegafusion[embed]) (4.19.2)\r\n", - "Requirement already satisfied: numpy in ./env/lib/python3.10/site-packages (from altair>=4.2.0->vegafusion[embed]) (1.26.4)\r\n", - "Requirement already satisfied: packaging in ./env/lib/python3.10/site-packages (from altair>=4.2.0->vegafusion[embed]) (23.2)\r\n", - "Requirement already satisfied: toolz in ./env/lib/python3.10/site-packages (from altair>=4.2.0->vegafusion[embed]) (0.12.1)\r\n", - "Requirement already satisfied: typing-extensions>=4.0.1 in ./env/lib/python3.10/site-packages (from altair>=4.2.0->vegafusion[embed]) (4.9.0)\r\n", - "Requirement already satisfied: python-dateutil>=2.8.2 in ./env/lib/python3.10/site-packages (from pandas->vegafusion[embed]) (2.8.2)\r\n", - "Requirement already satisfied: pytz>=2020.1 in ./env/lib/python3.10/site-packages (from pandas->vegafusion[embed]) (2023.3.post1)\r\n", - "Requirement already satisfied: tzdata>=2022.7 in ./env/lib/python3.10/site-packages (from pandas->vegafusion[embed]) (2024.1)\r\n", - "Requirement already satisfied: attrs>=22.2.0 in ./env/lib/python3.10/site-packages (from jsonschema>=3.0->altair>=4.2.0->vegafusion[embed]) (23.1.0)\r\n", - "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in ./env/lib/python3.10/site-packages (from jsonschema>=3.0->altair>=4.2.0->vegafusion[embed]) (2023.7.1)\r\n", - "Requirement already satisfied: referencing>=0.28.4 in ./env/lib/python3.10/site-packages (from jsonschema>=3.0->altair>=4.2.0->vegafusion[embed]) (0.30.2)\r\n", - "Requirement already satisfied: rpds-py>=0.7.1 in ./env/lib/python3.10/site-packages (from jsonschema>=3.0->altair>=4.2.0->vegafusion[embed]) (0.10.6)\r\n", - "Requirement already satisfied: six>=1.5 in ./env/lib/python3.10/site-packages (from python-dateutil>=2.8.2->pandas->vegafusion[embed]) (1.16.0)\r\n", - "Requirement already satisfied: MarkupSafe>=2.0 in ./env/lib/python3.10/site-packages (from jinja2->altair>=4.2.0->vegafusion[embed]) (2.1.3)\r\n", - "Downloading vegafusion_python_embed-1.6.4-cp38-abi3-macosx_11_0_arm64.whl (17.7 MB)\r\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m17.7/17.7 MB\u001b[0m \u001b[31m5.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\r\n", - "\u001b[?25hDownloading vl_convert_python-1.2.3-cp37-abi3-macosx_11_0_arm64.whl (25.4 MB)\r\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m25.4/25.4 MB\u001b[0m \u001b[31m45.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\r\n", - "\u001b[?25hDownloading protobuf-4.25.3-cp37-abi3-macosx_10_9_universal2.whl (394 kB)\r\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m394.2/394.2 kB\u001b[0m \u001b[31m37.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\r\n", - "\u001b[?25hDownloading vegafusion-1.6.4-py3-none-any.whl (52 kB)\r\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m52.5/52.5 kB\u001b[0m \u001b[31m5.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\r\n", - "\u001b[?25hInstalling collected packages: vegafusion-python-embed, vl-convert-python, protobuf, vegafusion\r\n", - "Successfully installed protobuf-4.25.3 vegafusion-1.6.4 vegafusion-python-embed-1.6.4 vl-convert-python-1.2.3\r\n", - "Note: you may need to restart the kernel to use updated packages.\n" + "Collecting sentence_transformers\n", + " Downloading sentence_transformers-3.0.1-py3-none-any.whl (227 kB)\n", + "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m227.1/227.1 kB\u001B[0m \u001B[31m1.9 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", + "\u001B[?25hCollecting langchain\n", + " Downloading langchain-0.2.5-py3-none-any.whl (974 kB)\n", + "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m974.6/974.6 kB\u001B[0m \u001B[31m10.9 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", + "\u001B[?25hCollecting langchain-openai\n", + " Downloading langchain_openai-0.1.9-py3-none-any.whl (40 kB)\n", + "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m40.3/40.3 kB\u001B[0m \u001B[31m4.6 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", + "\u001B[?25hCollecting langchain_community\n", + " Downloading langchain_community-0.2.5-py3-none-any.whl (2.2 MB)\n", + "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m2.2/2.2 MB\u001B[0m \u001B[31m32.0 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", + "\u001B[?25hCollecting openai\n", + " Downloading openai-1.35.3-py3-none-any.whl (327 kB)\n", + "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m327.4/327.4 kB\u001B[0m \u001B[31m27.6 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", + "\u001B[?25hCollecting tiktoken\n", + " Downloading tiktoken-0.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.1 MB)\n", + "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m1.1/1.1 MB\u001B[0m \u001B[31m38.7 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", + "\u001B[?25hCollecting python-dotenv\n", + " Downloading python_dotenv-1.0.1-py3-none-any.whl (19 kB)\n", + "Collecting gradio\n", + " Downloading gradio-4.36.1-py3-none-any.whl (12.3 MB)\n", + "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m12.3/12.3 MB\u001B[0m \u001B[31m46.4 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", + "\u001B[?25hCollecting graphdatascience\n", + " Downloading graphdatascience-1.10-py3-none-any.whl (1.6 MB)\n", + "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m1.6/1.6 MB\u001B[0m \u001B[31m63.0 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", + "\u001B[?25hRequirement already satisfied: altair in /usr/local/lib/python3.10/dist-packages (4.2.2)\n", + "Collecting neo4j_tools\n", + " Downloading neo4j_tools-0.5.1-py3-none-any.whl (8.0 kB)\n", + "Requirement already satisfied: transformers<5.0.0,>=4.34.0 in /usr/local/lib/python3.10/dist-packages (from sentence_transformers) (4.41.2)\n", + "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from sentence_transformers) (4.66.4)\n", + "Requirement already satisfied: torch>=1.11.0 in /usr/local/lib/python3.10/dist-packages (from sentence_transformers) (2.3.0+cu121)\n", + "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from sentence_transformers) (1.25.2)\n", + "Requirement already satisfied: scikit-learn in /usr/local/lib/python3.10/dist-packages (from sentence_transformers) (1.2.2)\n", + "Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (from sentence_transformers) (1.11.4)\n", + "Requirement already satisfied: huggingface-hub>=0.15.1 in /usr/local/lib/python3.10/dist-packages (from sentence_transformers) (0.23.4)\n", + "Requirement already satisfied: Pillow in /usr/local/lib/python3.10/dist-packages (from sentence_transformers) (9.4.0)\n", + "Requirement already satisfied: PyYAML>=5.3 in /usr/local/lib/python3.10/dist-packages (from langchain) (6.0.1)\n", + "Requirement already satisfied: SQLAlchemy<3,>=1.4 in /usr/local/lib/python3.10/dist-packages (from langchain) (2.0.30)\n", + "Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in /usr/local/lib/python3.10/dist-packages (from langchain) (3.9.5)\n", + "Requirement already satisfied: async-timeout<5.0.0,>=4.0.0 in /usr/local/lib/python3.10/dist-packages (from langchain) (4.0.3)\n", + "Collecting langchain-core<0.3.0,>=0.2.7 (from langchain)\n", + " Downloading langchain_core-0.2.9-py3-none-any.whl (321 kB)\n", + "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m321.8/321.8 kB\u001B[0m \u001B[31m25.7 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", + "\u001B[?25hCollecting langchain-text-splitters<0.3.0,>=0.2.0 (from langchain)\n", + " Downloading langchain_text_splitters-0.2.1-py3-none-any.whl (23 kB)\n", + "Collecting langsmith<0.2.0,>=0.1.17 (from langchain)\n", + " Downloading langsmith-0.1.81-py3-none-any.whl (127 kB)\n", + "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m127.1/127.1 kB\u001B[0m \u001B[31m14.6 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", + "\u001B[?25hRequirement already satisfied: pydantic<3,>=1 in /usr/local/lib/python3.10/dist-packages (from langchain) (2.7.4)\n", + "Requirement already satisfied: requests<3,>=2 in /usr/local/lib/python3.10/dist-packages (from langchain) (2.31.0)\n", + "Requirement already satisfied: tenacity<9.0.0,>=8.1.0 in /usr/local/lib/python3.10/dist-packages (from langchain) (8.4.1)\n", + "Collecting dataclasses-json<0.7,>=0.5.7 (from langchain_community)\n", + " Downloading dataclasses_json-0.6.7-py3-none-any.whl (28 kB)\n", + "Requirement already satisfied: anyio<5,>=3.5.0 in /usr/local/lib/python3.10/dist-packages (from openai) (3.7.1)\n", + "Requirement already satisfied: distro<2,>=1.7.0 in /usr/lib/python3/dist-packages (from openai) (1.7.0)\n", + "Collecting httpx<1,>=0.23.0 (from openai)\n", + " Downloading httpx-0.27.0-py3-none-any.whl (75 kB)\n", + "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m75.6/75.6 kB\u001B[0m \u001B[31m9.4 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", + "\u001B[?25hRequirement already satisfied: sniffio in /usr/local/lib/python3.10/dist-packages (from openai) (1.3.1)\n", + "Requirement already satisfied: typing-extensions<5,>=4.7 in /usr/local/lib/python3.10/dist-packages (from openai) (4.12.2)\n", + "Requirement already satisfied: regex>=2022.1.18 in /usr/local/lib/python3.10/dist-packages (from tiktoken) (2024.5.15)\n", + "Collecting aiofiles<24.0,>=22.0 (from gradio)\n", + " Downloading aiofiles-23.2.1-py3-none-any.whl (15 kB)\n", + "Collecting fastapi (from gradio)\n", + " Downloading fastapi-0.111.0-py3-none-any.whl (91 kB)\n", + "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m92.0/92.0 kB\u001B[0m \u001B[31m10.7 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", + "\u001B[?25hCollecting ffmpy (from gradio)\n", + " Downloading ffmpy-0.3.2.tar.gz (5.5 kB)\n", + " Preparing metadata (setup.py) ... \u001B[?25l\u001B[?25hdone\n", + "Collecting gradio-client==1.0.1 (from gradio)\n", + " Downloading gradio_client-1.0.1-py3-none-any.whl (318 kB)\n", + "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m318.1/318.1 kB\u001B[0m \u001B[31m25.8 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", + "\u001B[?25hRequirement already satisfied: importlib-resources<7.0,>=1.3 in /usr/local/lib/python3.10/dist-packages (from gradio) (6.4.0)\n", + "Requirement already satisfied: jinja2<4.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (3.1.4)\n", + "Requirement already satisfied: markupsafe~=2.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (2.1.5)\n", + "Requirement already satisfied: matplotlib~=3.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (3.7.1)\n", + "Collecting orjson~=3.0 (from gradio)\n", + " Downloading orjson-3.10.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (144 kB)\n", + "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m145.0/145.0 kB\u001B[0m \u001B[31m12.5 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", + "\u001B[?25hRequirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from gradio) (24.1)\n", + "Requirement already satisfied: pandas<3.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (2.0.3)\n", + "Collecting pydub (from gradio)\n", + " Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)\n", + "Collecting python-multipart>=0.0.9 (from gradio)\n", + " Downloading python_multipart-0.0.9-py3-none-any.whl (22 kB)\n", + "Collecting ruff>=0.2.2 (from gradio)\n", + " Downloading ruff-0.4.10-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (10.0 MB)\n", + "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m10.0/10.0 MB\u001B[0m \u001B[31m54.4 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", + "\u001B[?25hCollecting semantic-version~=2.0 (from gradio)\n", + " Downloading semantic_version-2.10.0-py2.py3-none-any.whl (15 kB)\n", + "Collecting tomlkit==0.12.0 (from gradio)\n", + " Downloading tomlkit-0.12.0-py3-none-any.whl (37 kB)\n", + "Requirement already satisfied: typer<1.0,>=0.12 in /usr/local/lib/python3.10/dist-packages (from gradio) (0.12.3)\n", + "Requirement already satisfied: urllib3~=2.0 in /usr/local/lib/python3.10/dist-packages (from gradio) (2.0.7)\n", + "Collecting uvicorn>=0.14.0 (from gradio)\n", + " Downloading uvicorn-0.30.1-py3-none-any.whl (62 kB)\n", + "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m62.4/62.4 kB\u001B[0m \u001B[31m7.8 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", + "\u001B[?25hRequirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from gradio-client==1.0.1->gradio) (2023.6.0)\n", + "Collecting websockets<12.0,>=10.0 (from gradio-client==1.0.1->gradio)\n", + " Downloading websockets-11.0.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (129 kB)\n", + "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m129.9/129.9 kB\u001B[0m \u001B[31m12.0 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", + "\u001B[?25hCollecting multimethod<2.0,>=1.0 (from graphdatascience)\n", + " Downloading multimethod-1.11.2-py3-none-any.whl (10 kB)\n", + "Collecting neo4j<6.0,>=4.4.2 (from graphdatascience)\n", + " Downloading neo4j-5.21.0-py3-none-any.whl (286 kB)\n", + "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m286.8/286.8 kB\u001B[0m \u001B[31m27.1 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", + "\u001B[?25hRequirement already satisfied: pyarrow<16.0,>=11.0 in /usr/local/lib/python3.10/dist-packages (from graphdatascience) (14.0.2)\n", + "Collecting textdistance<5.0,>=4.0 (from graphdatascience)\n", + " Downloading textdistance-4.6.2-py3-none-any.whl (31 kB)\n", + "Requirement already satisfied: entrypoints in /usr/local/lib/python3.10/dist-packages (from altair) (0.4)\n", + "Requirement already satisfied: jsonschema>=3.0 in /usr/local/lib/python3.10/dist-packages (from altair) (4.19.2)\n", + "Requirement already satisfied: toolz in /usr/local/lib/python3.10/dist-packages (from altair) (0.12.1)\n", + "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.3.1)\n", + "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (23.2.0)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.4.1)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (6.0.5)\n", + "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.9.4)\n", + "Requirement already satisfied: idna>=2.8 in /usr/local/lib/python3.10/dist-packages (from anyio<5,>=3.5.0->openai) (3.7)\n", + "Requirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio<5,>=3.5.0->openai) (1.2.1)\n", + "Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain_community)\n", + " Downloading marshmallow-3.21.3-py3-none-any.whl (49 kB)\n", + "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m49.2/49.2 kB\u001B[0m \u001B[31m5.3 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", + "\u001B[?25hCollecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langchain_community)\n", + " Downloading typing_inspect-0.9.0-py3-none-any.whl (8.8 kB)\n", + "Requirement already satisfied: certifi in /usr/local/lib/python3.10/dist-packages (from httpx<1,>=0.23.0->openai) (2024.6.2)\n", + "Collecting httpcore==1.* (from httpx<1,>=0.23.0->openai)\n", + " Downloading httpcore-1.0.5-py3-none-any.whl (77 kB)\n", + "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m77.9/77.9 kB\u001B[0m \u001B[31m8.6 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", + "\u001B[?25hCollecting h11<0.15,>=0.13 (from httpcore==1.*->httpx<1,>=0.23.0->openai)\n", + " Downloading h11-0.14.0-py3-none-any.whl (58 kB)\n", + "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m58.3/58.3 kB\u001B[0m \u001B[31m6.2 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", + "\u001B[?25hRequirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.15.1->sentence_transformers) (3.15.1)\n", + "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair) (2023.12.1)\n", + "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair) (0.35.1)\n", + "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair) (0.18.1)\n", + "Collecting jsonpatch<2.0,>=1.33 (from langchain-core<0.3.0,>=0.2.7->langchain)\n", + " Downloading jsonpatch-1.33-py2.py3-none-any.whl (12 kB)\n", + "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib~=3.0->gradio) (1.2.1)\n", + "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib~=3.0->gradio) (0.12.1)\n", + "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib~=3.0->gradio) (4.53.0)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib~=3.0->gradio) (1.4.5)\n", + "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib~=3.0->gradio) (3.1.2)\n", + "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.10/dist-packages (from matplotlib~=3.0->gradio) (2.8.2)\n", + "Requirement already satisfied: pytz in /usr/local/lib/python3.10/dist-packages (from neo4j<6.0,>=4.4.2->graphdatascience) (2023.4)\n", + "Requirement already satisfied: tzdata>=2022.1 in /usr/local/lib/python3.10/dist-packages (from pandas<3.0,>=1.0->gradio) (2024.1)\n", + "Requirement already satisfied: annotated-types>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from pydantic<3,>=1->langchain) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.18.4 in /usr/local/lib/python3.10/dist-packages (from pydantic<3,>=1->langchain) (2.18.4)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests<3,>=2->langchain) (3.3.2)\n", + "Requirement already satisfied: greenlet!=0.4.17 in /usr/local/lib/python3.10/dist-packages (from SQLAlchemy<3,>=1.4->langchain) (3.0.3)\n", + "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch>=1.11.0->sentence_transformers) (1.12.1)\n", + "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.11.0->sentence_transformers) (3.3)\n", + "Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=1.11.0->sentence_transformers)\n", + " Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)\n", + "Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=1.11.0->sentence_transformers)\n", + " Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)\n", + "Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch>=1.11.0->sentence_transformers)\n", + " Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)\n", + "Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch>=1.11.0->sentence_transformers)\n", + " Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)\n", + "Collecting nvidia-cublas-cu12==12.1.3.1 (from torch>=1.11.0->sentence_transformers)\n", + " Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)\n", + "Collecting nvidia-cufft-cu12==11.0.2.54 (from torch>=1.11.0->sentence_transformers)\n", + " Using cached nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl (121.6 MB)\n", + "Collecting nvidia-curand-cu12==10.3.2.106 (from torch>=1.11.0->sentence_transformers)\n", + " Using cached nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl (56.5 MB)\n", + "Collecting nvidia-cusolver-cu12==11.4.5.107 (from torch>=1.11.0->sentence_transformers)\n", + " Using cached nvidia_cusolver_cu12-11.4.5.107-py3-none-manylinux1_x86_64.whl (124.2 MB)\n", + "Collecting nvidia-cusparse-cu12==12.1.0.106 (from torch>=1.11.0->sentence_transformers)\n", + " Using cached nvidia_cusparse_cu12-12.1.0.106-py3-none-manylinux1_x86_64.whl (196.0 MB)\n", + "Collecting nvidia-nccl-cu12==2.20.5 (from torch>=1.11.0->sentence_transformers)\n", + " Using cached nvidia_nccl_cu12-2.20.5-py3-none-manylinux2014_x86_64.whl (176.2 MB)\n", + "Collecting nvidia-nvtx-cu12==12.1.105 (from torch>=1.11.0->sentence_transformers)\n", + " Using cached nvidia_nvtx_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (99 kB)\n", + "Requirement already satisfied: triton==2.3.0 in /usr/local/lib/python3.10/dist-packages (from torch>=1.11.0->sentence_transformers) (2.3.0)\n", + "Collecting nvidia-nvjitlink-cu12 (from nvidia-cusolver-cu12==11.4.5.107->torch>=1.11.0->sentence_transformers)\n", + " Downloading nvidia_nvjitlink_cu12-12.5.40-py3-none-manylinux2014_x86_64.whl (21.3 MB)\n", + "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m21.3/21.3 MB\u001B[0m \u001B[31m40.1 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", + "\u001B[?25hRequirement already satisfied: tokenizers<0.20,>=0.19 in /usr/local/lib/python3.10/dist-packages (from transformers<5.0.0,>=4.34.0->sentence_transformers) (0.19.1)\n", + "Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.10/dist-packages (from transformers<5.0.0,>=4.34.0->sentence_transformers) (0.4.3)\n", + "Requirement already satisfied: click>=8.0.0 in /usr/local/lib/python3.10/dist-packages (from typer<1.0,>=0.12->gradio) (8.1.7)\n", + "Requirement already satisfied: shellingham>=1.3.0 in /usr/local/lib/python3.10/dist-packages (from typer<1.0,>=0.12->gradio) (1.5.4)\n", + "Requirement already satisfied: rich>=10.11.0 in /usr/local/lib/python3.10/dist-packages (from typer<1.0,>=0.12->gradio) (13.7.1)\n", + "Collecting starlette<0.38.0,>=0.37.2 (from fastapi->gradio)\n", + " Downloading starlette-0.37.2-py3-none-any.whl (71 kB)\n", + "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m71.9/71.9 kB\u001B[0m \u001B[31m9.0 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", + "\u001B[?25hCollecting fastapi-cli>=0.0.2 (from fastapi->gradio)\n", + " Downloading fastapi_cli-0.0.4-py3-none-any.whl (9.5 kB)\n", + "Collecting ujson!=4.0.2,!=4.1.0,!=4.2.0,!=4.3.0,!=5.0.0,!=5.1.0,>=4.0.1 (from fastapi->gradio)\n", + " Downloading ujson-5.10.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (53 kB)\n", + "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m53.6/53.6 kB\u001B[0m \u001B[31m6.4 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", + "\u001B[?25hCollecting email_validator>=2.0.0 (from fastapi->gradio)\n", + " Downloading email_validator-2.2.0-py3-none-any.whl (33 kB)\n", + "Requirement already satisfied: joblib>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from scikit-learn->sentence_transformers) (1.4.2)\n", + "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn->sentence_transformers) (3.5.0)\n", + "Collecting dnspython>=2.0.0 (from email_validator>=2.0.0->fastapi->gradio)\n", + " Downloading dnspython-2.6.1-py3-none-any.whl (307 kB)\n", + "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m307.7/307.7 kB\u001B[0m \u001B[31m29.3 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", + "\u001B[?25hCollecting jsonpointer>=1.9 (from jsonpatch<2.0,>=1.33->langchain-core<0.3.0,>=0.2.7->langchain)\n", + " Downloading jsonpointer-3.0.0-py2.py3-none-any.whl (7.6 kB)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.7->matplotlib~=3.0->gradio) (1.16.0)\n", + "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich>=10.11.0->typer<1.0,>=0.12->gradio) (3.0.0)\n", + "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from rich>=10.11.0->typer<1.0,>=0.12->gradio) (2.16.1)\n", + "Collecting mypy-extensions>=0.3.0 (from typing-inspect<1,>=0.4.0->dataclasses-json<0.7,>=0.5.7->langchain_community)\n", + " Downloading mypy_extensions-1.0.0-py3-none-any.whl (4.7 kB)\n", + "Collecting httptools>=0.5.0 (from uvicorn>=0.14.0->gradio)\n", + " Downloading httptools-0.6.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (341 kB)\n", + "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m341.4/341.4 kB\u001B[0m \u001B[31m27.4 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", + "\u001B[?25hCollecting uvloop!=0.15.0,!=0.15.1,>=0.14.0 (from uvicorn>=0.14.0->gradio)\n", + " Downloading uvloop-0.19.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.4 MB)\n", + "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m3.4/3.4 MB\u001B[0m \u001B[31m75.3 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", + "\u001B[?25hCollecting watchfiles>=0.13 (from uvicorn>=0.14.0->gradio)\n", + " Downloading watchfiles-0.22.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)\n", + "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m1.2/1.2 MB\u001B[0m \u001B[31m58.9 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", + "\u001B[?25hRequirement already satisfied: mpmath<1.4.0,>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from sympy->torch>=1.11.0->sentence_transformers) (1.3.0)\n", + "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py>=2.2.0->rich>=10.11.0->typer<1.0,>=0.12->gradio) (0.1.2)\n", + "Building wheels for collected packages: ffmpy\n", + " Building wheel for ffmpy (setup.py) ... \u001B[?25l\u001B[?25hdone\n", + " Created wheel for ffmpy: filename=ffmpy-0.3.2-py3-none-any.whl size=5584 sha256=afa0d81b13ea748af2cea319391efadf13909d8477fea0acdac97dce23286963\n", + " Stored in directory: /root/.cache/pip/wheels/bd/65/9a/671fc6dcde07d4418df0c592f8df512b26d7a0029c2a23dd81\n", + "Successfully built ffmpy\n", + "Installing collected packages: pydub, ffmpy, websockets, uvloop, ujson, tomlkit, textdistance, semantic-version, ruff, python-multipart, python-dotenv, orjson, nvidia-nvtx-cu12, nvidia-nvjitlink-cu12, nvidia-nccl-cu12, nvidia-curand-cu12, nvidia-cufft-cu12, nvidia-cuda-runtime-cu12, nvidia-cuda-nvrtc-cu12, nvidia-cuda-cupti-cu12, nvidia-cublas-cu12, neo4j, mypy-extensions, multimethod, marshmallow, jsonpointer, httptools, h11, dnspython, aiofiles, watchfiles, uvicorn, typing-inspect, tiktoken, starlette, nvidia-cusparse-cu12, nvidia-cudnn-cu12, jsonpatch, httpcore, email_validator, nvidia-cusolver-cu12, langsmith, httpx, graphdatascience, dataclasses-json, openai, neo4j_tools, langchain-core, gradio-client, fastapi-cli, sentence_transformers, langchain-text-splitters, langchain-openai, fastapi, langchain, gradio, langchain_community\n", + "Successfully installed aiofiles-23.2.1 dataclasses-json-0.6.7 dnspython-2.6.1 email_validator-2.2.0 fastapi-0.111.0 fastapi-cli-0.0.4 ffmpy-0.3.2 gradio-4.36.1 gradio-client-1.0.1 graphdatascience-1.10 h11-0.14.0 httpcore-1.0.5 httptools-0.6.1 httpx-0.27.0 jsonpatch-1.33 jsonpointer-3.0.0 langchain-0.2.5 langchain-core-0.2.9 langchain-openai-0.1.9 langchain-text-splitters-0.2.1 langchain_community-0.2.5 langsmith-0.1.81 marshmallow-3.21.3 multimethod-1.11.2 mypy-extensions-1.0.0 neo4j-5.21.0 neo4j_tools-0.5.1 nvidia-cublas-cu12-12.1.3.1 nvidia-cuda-cupti-cu12-12.1.105 nvidia-cuda-nvrtc-cu12-12.1.105 nvidia-cuda-runtime-cu12-12.1.105 nvidia-cudnn-cu12-8.9.2.26 nvidia-cufft-cu12-11.0.2.54 nvidia-curand-cu12-10.3.2.106 nvidia-cusolver-cu12-11.4.5.107 nvidia-cusparse-cu12-12.1.0.106 nvidia-nccl-cu12-2.20.5 nvidia-nvjitlink-cu12-12.5.40 nvidia-nvtx-cu12-12.1.105 openai-1.35.3 orjson-3.10.5 pydub-0.25.1 python-dotenv-1.0.1 python-multipart-0.0.9 ruff-0.4.10 semantic-version-2.10.0 sentence_transformers-3.0.1 starlette-0.37.2 textdistance-4.6.2 tiktoken-0.7.0 tomlkit-0.12.0 typing-inspect-0.9.0 ujson-5.10.0 uvicorn-0.30.1 uvloop-0.19.0 watchfiles-0.22.0 websockets-11.0.3\n", + "Collecting vegafusion[embed]\n", + " Downloading vegafusion-1.6.9-py3-none-any.whl (54 kB)\n", + "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m54.5/54.5 kB\u001B[0m \u001B[31m698.2 kB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", + "\u001B[?25hCollecting altair>=5.2.0 (from vegafusion[embed])\n", + " Downloading altair-5.3.0-py3-none-any.whl (857 kB)\n", + "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m857.8/857.8 kB\u001B[0m \u001B[31m5.6 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", + "\u001B[?25hRequirement already satisfied: pyarrow>=5 in /usr/local/lib/python3.10/dist-packages (from vegafusion[embed]) (14.0.2)\n", + "Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from vegafusion[embed]) (2.0.3)\n", + "Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from vegafusion[embed]) (5.9.5)\n", + "Requirement already satisfied: protobuf in /usr/local/lib/python3.10/dist-packages (from vegafusion[embed]) (3.20.3)\n", + "Collecting vegafusion-python-embed==1.6.9 (from vegafusion[embed])\n", + " Downloading vegafusion_python_embed-1.6.9-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (25.1 MB)\n", + "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m25.1/25.1 MB\u001B[0m \u001B[31m33.9 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", + "\u001B[?25hCollecting vl-convert-python>=0.7.0 (from vegafusion[embed])\n", + " Downloading vl_convert_python-1.5.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (29.5 MB)\n", + "\u001B[2K \u001B[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001B[0m \u001B[32m29.5/29.5 MB\u001B[0m \u001B[31m11.9 MB/s\u001B[0m eta \u001B[36m0:00:00\u001B[0m\n", + "\u001B[?25hRequirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from altair>=5.2.0->vegafusion[embed]) (3.1.4)\n", + "Requirement already satisfied: jsonschema>=3.0 in /usr/local/lib/python3.10/dist-packages (from altair>=5.2.0->vegafusion[embed]) (4.19.2)\n", + "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from altair>=5.2.0->vegafusion[embed]) (1.25.2)\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from altair>=5.2.0->vegafusion[embed]) (24.1)\n", + "Requirement already satisfied: toolz in /usr/local/lib/python3.10/dist-packages (from altair>=5.2.0->vegafusion[embed]) (0.12.1)\n", + "Requirement already satisfied: typing-extensions>=4.0.1 in /usr/local/lib/python3.10/dist-packages (from altair>=5.2.0->vegafusion[embed]) (4.12.2)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas->vegafusion[embed]) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->vegafusion[embed]) (2023.4)\n", + "Requirement already satisfied: tzdata>=2022.1 in /usr/local/lib/python3.10/dist-packages (from pandas->vegafusion[embed]) (2024.1)\n", + "Requirement already satisfied: attrs>=22.2.0 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair>=5.2.0->vegafusion[embed]) (23.2.0)\n", + "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair>=5.2.0->vegafusion[embed]) (2023.12.1)\n", + "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair>=5.2.0->vegafusion[embed]) (0.35.1)\n", + "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=3.0->altair>=5.2.0->vegafusion[embed]) (0.18.1)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.2->pandas->vegafusion[embed]) (1.16.0)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->altair>=5.2.0->vegafusion[embed]) (2.1.5)\n", + "Installing collected packages: vegafusion-python-embed, vl-convert-python, altair, vegafusion\n", + " Attempting uninstall: altair\n", + " Found existing installation: altair 4.2.2\n", + " Uninstalling altair-4.2.2:\n", + " Successfully uninstalled altair-4.2.2\n", + "Successfully installed altair-5.3.0 vegafusion-1.6.9 vegafusion-python-embed-1.6.9 vl-convert-python-1.5.0\n" ] } ], @@ -462,7 +334,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 3, "metadata": { "id": "7psF1otOdyXe" }, @@ -477,7 +349,7 @@ "from langchain_openai import OpenAIEmbeddings, ChatOpenAI\n", "from langchain.vectorstores.neo4j_vector import Neo4jVector\n", "from langchain.graphs import Neo4jGraph\n", - "from langchain.prompts import SystemMessagePromptTemplate, HumanMessagePromptTemplate, ChatPromptTemplate\n", + "from langchain.prompts import PromptTemplate\n", "from langchain.schema import StrOutputParser\n", "from langchain.schema.runnable import RunnableLambda\n", "import gradio as gr\n", @@ -497,21 +369,20 @@ "### Setup Credentials and Environment Variables\n", "\n", "There are two things you need here.\n", - "1. Start a blank [Neo4j Sandbox](https://sandbox.neo4j.com/). Get your URI and password and plug them in below. Do not change the Neo4j username.\n", - "2. Get your OpenAI API key. You can use [this one](https://docs.google.com/document/d/19Lqjd0MqRs088KUVnd23ZrVU9G0OAg-53U72VrFwwms/edit) if you do not have one already\n", + "1. Start a blank graph data science neo4j sandbox [Neo4j Sandbox](https://sandbox.neo4j.com/). Get your URI and password and plug them in below. Do not change the Neo4j username.\n", + "2. Get your OpenAI API key. You can use [this one](https://docs.google.com/document/d/19Lqjd0MqRs088KUVnd23ZrVU9G0OAg-53U72VrFwwms/edit) if you do not have one already.\n", "\n", "To make this easy, you can write the credentials and env variables directly into the below cell." ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": { "id": "BQ9s0ZWhekd8" }, "outputs": [], "source": [ - "from dotenv import load_dotenv\n", "import os\n", "\n", "# Neo4j\n", @@ -521,15 +392,14 @@ "AURA_DS = False\n", "\n", "# AI\n", - "LLM = 'gpt-4'\n", - "\n", - "# OpenAI - Required when using OpenAI models\n", - "os.environ['OPENAI_API_KEY'] = 'sk-...' #change this" + "LLM = 'gpt-4o'\n", + "os.environ['OPENAI_API_KEY'] = 'sk-...' #change this\n", + "OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": { "id": "o-98NuINdyXe" }, @@ -549,7 +419,8 @@ " AURA_DS = eval(os.getenv('AURA_DS').title())\n", "\n", " # AI\n", - " LLM = os.getenv('LLM')" + " LLM = 'gpt-4o'\n", + " OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')" ] }, { @@ -586,11 +457,24 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 5, "metadata": { - "id": "92GFeMaRdyXf" + "id": "92GFeMaRdyXf", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "cbf495c5-13d2-4483-e814-ac66afd28d26" }, - "outputs": [], + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "WARNING:neo4j.notifications:Received notification from DBMS server: {severity: WARNING} {code: Neo.ClientNotification.Statement.FeatureDeprecationWarning} {category: DEPRECATION} {title: This feature is deprecated and will be removed in future versions.} {description: The query used a deprecated field from a procedure. ('advertisedListenAddress' returned by 'gds.debug.arrow' is deprecated.)} {position: line: 1, column: 1, offset: 0} for query: 'CALL gds.debug.arrow()'\n", + "WARNING:neo4j.notifications:Received notification from DBMS server: {severity: WARNING} {code: Neo.ClientNotification.Statement.FeatureDeprecationWarning} {category: DEPRECATION} {title: This feature is deprecated and will be removed in future versions.} {description: The query used a deprecated field from a procedure. ('serverLocation' returned by 'gds.debug.arrow' is deprecated.)} {position: line: 1, column: 1, offset: 0} for query: 'CALL gds.debug.arrow()'\n" + ] + } + ], "source": [ "from graphdatascience import GraphDataScience\n", "\n", @@ -616,20 +500,39 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 6, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 423 }, "id": "QJ6qg0qMw5JO", - "outputId": "378854bf-e03c-4e62-f89c-589c68d8955f" + "outputId": "e4f26594-384d-479f-9672-a55003995d27" }, "outputs": [ { + "output_type": "execute_result", "data": { + "text/plain": [ + " key value\n", + "0 gdsVersion 2.6.5\n", + "1 gdsEdition Unlicensed\n", + "2 neo4jVersion 5.19.0\n", + "3 minimumRequiredJavaVersion 11\n", + "4 unavailableCompatibility Neo4j 4.4\n", + ".. ... ...\n", + "97 server.memory.pagecache.size 536870912\n", + "98 server.memory.off_heap.transaction_max_size 2147483648\n", + "99 dbms.memory.transaction.total.max 8589934592\n", + "100 db.memory.transaction.total.max 0\n", + "101 db.memory.transaction.max 0\n", + "\n", + "[102 rows x 2 columns]" + ], "text/html": [ - "
\n", + "\n", + "
\n", + "
\n", "\n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" ], - "text/plain": [ - " key value\n", - "0 gdsVersion 2.6.0\n", - "1 gdsEdition Unlicensed\n", - "2 neo4jVersion 5.16.0\n", - "3 minimumRequiredJavaVersion 11\n", - "4 unavailableCompatibility Neo4j 4.4\n", - ".. ... ...\n", - "94 server.memory.pagecache.size 536870912\n", - "95 server.memory.off_heap.transaction_max_size 2147483648\n", - "96 dbms.memory.transaction.total.max 375809638\n", - "97 db.memory.transaction.total.max 0\n", - "98 db.memory.transaction.max 0\n", - "\n", - "[99 rows x 2 columns]" - ] + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "summary": "{\n \"name\": \"gds\",\n \"rows\": 102,\n \"fields\": [\n {\n \"column\": \"key\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 92,\n \"samples\": [\n \"poolCodeheapProfiledNmethodsUsed\",\n \"heapFree\",\n \"poolG1OldGenTotalInBytes\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"value\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 79,\n \"samples\": [\n 680591360,\n \"2.6.5\",\n 2147483648\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } }, - "execution_count": 7, "metadata": {}, - "output_type": "execute_result" + "execution_count": 6 } ], "source": [ @@ -761,12 +860,12 @@ "height": 1000 }, "id": "vuweMfqpdyXf", - "outputId": "449362a2-f73f-423c-bfc3-4883d7cd4659" + "outputId": "97659f43-92a4-48ce-ecca-cbe9fc1cb6d3" }, "outputs": [ { - "name": "stdout", "output_type": "stream", + "name": "stdout", "text": [ "====== loading Department nodes ======\n", "staging 266 records\n", @@ -861,14 +960,22 @@ "Loaded 10,000 of 23,199 relationships\n", "Loaded 20,000 of 23,199 relationships\n", "Loaded 23,199 of 23,199 relationships\n", - "CPU times: user 1.16 s, sys: 52.6 ms, total: 1.21 s\n", - "Wall time: 1min 25s\n" + "CPU times: user 2.59 s, sys: 61.7 ms, total: 2.65 s\n", + "Wall time: 7.36 s\n" ] }, { + "output_type": "execute_result", "data": { + "text/plain": [ + "Empty DataFrame\n", + "Columns: []\n", + "Index: []" + ], "text/html": [ - "
\n", + "\n", + "
\n", + "
\n", "\n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + "
\n" ], - "text/plain": [ - " propertySetCount\n", - "0 8044" - ] + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "summary": "{\n \"name\": \"get_ipython()\",\n \"rows\": 0,\n \"fields\": []\n}" + } }, - "execution_count": 8, "metadata": {}, - "output_type": "execute_result" + "execution_count": 8 } ], "source": [ @@ -950,6 +1133,13 @@ "SET r.tDat = date(r.tDat)\n", "''')\n", "\n", + "# convert NaN product descriptions\n", + "gds.run_cypher('''\n", + "MATCH (n:Product) WHERE valueType(n.detailDesc) <> \"STRING NOT NULL\"\n", + "SET n.detailDesc = \"\"\n", + "RETURN n\n", + "''')\n", + "\n", "# create combined text property. This will help simplify later with semantic search and RAG\n", "gds.run_cypher(\"\"\"\n", " MATCH(p:Product)\n", @@ -960,7 +1150,13 @@ " 'Garment Type: ' + p.garmentGroupName + '\\n' +\n", " 'Description: ' + p.detailDesc\n", " RETURN count(p) AS propertySetCount\n", - " \"\"\")" + " \"\"\")\n", + "\n", + "# write dummy urls to illustrate sourcing in future retrieval\n", + "gds.run_cypher(\"\"\"\n", + "MATCH(p:Product)\n", + "SET p.url = 'https://representative-domain/product/' + p.productCode\n", + "\"\"\")" ] }, { @@ -982,61 +1178,44 @@ "id": "-WrlFCN1dyXg" }, "source": [ - "### Creating Text Embeddings\n", + "### Creating Text Embeddings & Vector Index\n", "\n", - "To start we need to make embeddings for our product nodes." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false, - "id": "zuqGNOqTZACF" - }, - "source": [ - "First, we will instantiate our embedding model. This notebook has just been tested with OpenAI, but these models are pluggable. You could choose embedding models from other providers, including cloud providers like AWS Bedrock and Vertex AI Generative AI." - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": { - "id": "KwRdCBawdyXg" - }, - "outputs": [], - "source": [ - "from langchain_openai import OpenAIEmbeddings\n", + "To start we need to generate embeddings for our product text.\n", "\n", - "embedding_model = OpenAIEmbeddings()\n", - "embedding_dimension = 1536" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": false, - "id": "FzzEFpJPZACF" - }, - "source": [ - "Now let's create a dataframe with a text column to embed. In this case, we will combine multiple text columns, such as product name, type, description, etc. This provides the embedding model with more context. Some products are missing a description (a small minority). For our intents and purposes we will leave them out. In a more in-depth workflow, you would likely want to impute the missing values." + "Neo4j has native integrations with popular embedding APIs (OpenAI, Vertex AI, Amazon Bedrock, Azure OpenAI) making it possible to generate embeddings with a single Cypher query using `genai.vector.*` operations*.\n", + "\n", + "The below query embeds the Product text property with OpenAI `text-embedding-ada-002` in batches. Specifically it\n", + "1. Matches every Product that has a detailed description\n", + "2. Uses the `collect` aggregation function to batch products into a set number of partitions\n", + "3. Encodes the text property in batches using OpenAI `text-embedding-ada-002`\n", + "4. Sets the embedding as a vector property using `db.create.setNodeVectorProperty`. This special function is used to set the properties as floats rather than double precision, which requires more space. This becomes important as these embedding vectors tend to be long, and the size can add up quickly.\n", + "\n", + "*NOTE: `genai.vector.*` operations are not available in Neo4j Community Edition. For Neo4j Community Edition you will need to generate embeddings externally and ingest them into Neo4j." ] }, { - "cell_type": "code", - "execution_count": 10, "metadata": { "colab": { "base_uri": "https://localhost:8080/", - "height": 770 + "height": 53 }, - "id": "36VCtp9HdyXh", - "outputId": "81d33bc8-c646-49f6-aa85-5eaace2434b0" + "id": "Rh3Zomw0dzh4", + "outputId": "a0655f99-2b21-40bd-fc20-ee473e5e8728" }, + "cell_type": "code", "outputs": [ { + "output_type": "execute_result", "data": { + "text/plain": [ + "Empty DataFrame\n", + "Columns: []\n", + "Index: []" + ], "text/html": [ - "
\n", + "\n", + "
\n", + "
\n", "\n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + "
\n" ], - "text/plain": [ - " productCode \\\n", - "0 108775 \n", - "1 110065 \n", - "2 111565 \n", - "3 111586 \n", - "4 111593 \n", - "... ... \n", - "8039 936862 \n", - "8040 936979 \n", - "8041 937138 \n", - "8042 942187 \n", - "8043 952938 \n", - "\n", - " text \n", - "0 \\n##Product\\nName: Strap top\\nType: Vest top\\nGroup: Garment Upper body\\nGarment Type: Jersey Basic\\nDescription: Jersey top with narrow shoulder straps.\\n \n", - "1 \\n##Product\\nName: OP T-shirt (Idro)\\nType: Bra\\nGroup: Underwear\\nGarment Type: Under-, Nightwear\\nDescription: Microfibre T-shirt bra with underwired, moulded, lightly padded cups that shape the bust and provide good support. Narrow adjustable shoulder straps and a narrow hook-and-eye fastening at the back. Without visible seams for greater comfort.\\n \n", - "2 \\n##Product\\nName: 20 den 1p Stockings\\nType: Underwear Tights\\nGroup: Socks & Tights\\nGarment Type: Socks and Tights\\nDescription: Semi shiny nylon stockings with a wide, reinforced trim at the top. Use with a suspender belt. 20 denier.\\n \n", - "3 \\n##Product\\nName: Shape Up 30 den 1p Tights\\nType: Leggings/Tights\\nGroup: Garment Lower body\\nGarment Type: Socks and Tights\\nDescription: Tights with built-in support to lift the bottom. Black in 30 denier and light amber in 15 denier.\\n \n", - "4 \\n##Product\\nName: Support 40 den 1p Tights\\nType: Underwear Tights\\nGroup: Socks & Tights\\nGarment Type: Socks and Tights\\nDescription: Semi shiny tights that shape the tummy, thighs and calves while also encouraging blood circulation in the legs. Elasticated waist.\\n \n", - "... ... \n", - "8039 \\n##Product\\nName: EDC Marla dress\\nType: Dress\\nGroup: Garment Full body\\nGarment Type: Special Offers\\nDescription: Calf-length dress in a patterned Tencel™ lyocell weave with a V-neck, sewn in wrapover at the top and decorative ties at one side. 3/4-length dolman sleeves with narrow, covered elastication at the cuffs. Gathered seam at the waist with concealed elastication and a flared skirt with a gathered tier at the hem for added width. Unlined.\\n \n", - "8040 \\n##Product\\nName: Class Filippa Necklace\\nType: Necklace\\nGroup: Accessories\\nGarment Type: Accessories\\nDescription: Metal chain necklace with a pendant. Adjustable length.\\n \n", - "8041 \\n##Product\\nName: Flirty Albin bracelet pk\\nType: Bracelet\\nGroup: Accessories\\nGarment Type: Accessories\\nDescription: Metal chain bracelets. Two plain and two with pendants. Adjustable length.\\n \n", - "8042 \\n##Product\\nName: ED Sasha tee\\nType: T-shirt\\nGroup: Garment Upper body\\nGarment Type: Jersey Fancy\\nDescription: Oversized, straight-cut T-shirt in a soft modal and cotton jersey blend with a ribbed neckline and low dropped shoulders.\\n \n", - "8043 \\n##Product\\nName: Elton top\\nType: Top\\nGroup: Garment Upper body\\nGarment Type: Jersey Fancy\\nDescription: Fitted top in jersey with a round neckline and extra-long sleeves. Additional draped layer at the front.\\n \n", - "\n", - "[8018 rows x 2 columns]" - ] + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "summary": "{\n \"name\": \"} IN TRANSACTIONS OF 1 ROW''', params={'token':OPENAI_API_KEY, 'numberOfBatches':100})\",\n \"rows\": 0,\n \"fields\": []\n}" + } }, - "execution_count": 10, "metadata": {}, - "output_type": "execute_result" + "execution_count": 12 } ], + "execution_count": 12, "source": [ - "pd.set_option('display.max_rows', 10)\n", - "pd.set_option('display.max_colwidth', 500)\n", - "pd.set_option('display.width', 0)\n", - "\n", - "product_emb_df = product_df[['productCode', 'prodName', 'productTypeName', 'productGroupName', 'garmentGroupName', 'detailDesc']]\n", - "product_emb_df = product_emb_df[product_emb_df.detailDesc.notnull()]\n", - "\n", - "def create_doc(row):\n", - " return f'''\n", - "##Product\n", - "Name: {row.prodName}\n", - "Type: {row.productTypeName}\n", - "Group: {row.productGroupName}\n", - "Garment Type: {row.garmentGroupName}\n", - "Description: {row.detailDesc}\n", - "'''\n", + "#generate embeddings\n", "\n", - "product_emb_df['text'] = product_emb_df.apply(create_doc, axis=1)\n", - "product_emb_df = product_emb_df.drop(columns=['prodName', 'productTypeName', 'productGroupName', 'garmentGroupName', 'detailDesc'])\n", - "product_emb_df" + "gds.run_cypher('''\n", + "MATCH (n:Product) WHERE size(n.detailDesc) <> 0\n", + "WITH collect(n) AS nodes, toInteger(rand()*$numberOfBatches) AS partition\n", + "CALL {\n", + " WITH nodes\n", + " CALL genai.vector.encodeBatch([node IN nodes| node.text], \"OpenAI\", { token: $token})\n", + " YIELD index, vector\n", + " CALL db.create.setNodeVectorProperty(nodes[index], \"textEmbedding\", vector)\n", + "} IN TRANSACTIONS OF 1 ROW''', params={'token':OPENAI_API_KEY, 'numberOfBatches':100})" ] }, { - "cell_type": "markdown", "metadata": { - "collapsed": false, - "id": "bLpMaWmkZACF" + "id": "t_ewNqGEdzh4" }, + "cell_type": "markdown", "source": [ - "Now let’s embed the text with OpenAI. We will chunk this into batches for efficiency." + "After generating embeddings we will create a vector index for them. The [Neo4j Vector Index](https://neo4j.com/docs/cypher-manual/current/indexes-for-vector-search/) enables efficient Approximate Nearest Neighbor (ANN) search with vectors. It uses the Hierarchical Navigable Small World (HNSW) algorithm.\n", + "\n", + "\n", + "The below cell will create the index, then, with a separate query, await for the index to come online, meaning it is ready to be used in vector search.\n" ] }, { - "cell_type": "code", - "execution_count": 11, "metadata": { "colab": { "base_uri": "https://localhost:8080/", - "height": 1000 + "height": 53 }, - "id": "5qUudN0KdyXh", - "outputId": "1da33635-e206-4f14-bbb9-8b88120434f0" + "id": "PI0-WZvGdzh4", + "outputId": "cf21db15-b1b2-4f52-fb06-a21291697998" }, + "cell_type": "code", "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "Embedded 500 of 8018\n", - "Embedded 1000 of 8018\n", - "Embedded 1500 of 8018\n", - "Embedded 2000 of 8018\n", - "Embedded 2500 of 8018\n", - "Embedded 3000 of 8018\n", - "Embedded 3500 of 8018\n", - "Embedded 4000 of 8018\n", - "Embedded 4500 of 8018\n", - "Embedded 5000 of 8018\n", - "Embedded 5500 of 8018\n", - "Embedded 6000 of 8018\n", - "Embedded 6500 of 8018\n", - "Embedded 7000 of 8018\n", - "Embedded 7500 of 8018\n", - "Embedded 8000 of 8018\n", - "Embedded 8018 of 8018\n", - "CPU times: user 6.78 s, sys: 230 ms, total: 7.01 s\n", - "Wall time: 27 s\n" - ] - }, - { + "output_type": "execute_result", "data": { + "text/plain": [ + "Empty DataFrame\n", + "Columns: []\n", + "Index: []" + ], "text/html": [ - "
\n", + "\n", + "
\n", + "
\n", "\n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + "
\n" ], - "text/plain": [ - " productCode \\\n", - "0 108775 \n", - "1 110065 \n", - "2 111565 \n", - "3 111586 \n", - "4 111593 \n", - "... ... \n", - "8039 936862 \n", - "8040 936979 \n", - "8041 937138 \n", - "8042 942187 \n", - "8043 952938 \n", - "\n", - " text \\\n", - "0 \\n##Product\\nName: Strap top\\nType: Vest top\\nGroup: Garment Upper body\\nGarment Type: Jersey Basic\\nDescription: Jersey top with narrow shoulder straps.\\n \n", - "1 \\n##Product\\nName: OP T-shirt (Idro)\\nType: Bra\\nGroup: Underwear\\nGarment Type: Under-, Nightwear\\nDescription: Microfibre T-shirt bra with underwired, moulded, lightly padded cups that shape the bust and provide good support. Narrow adjustable shoulder straps and a narrow hook-and-eye fastening at the back. Without visible seams for greater comfort.\\n \n", - "2 \\n##Product\\nName: 20 den 1p Stockings\\nType: Underwear Tights\\nGroup: Socks & Tights\\nGarment Type: Socks and Tights\\nDescription: Semi shiny nylon stockings with a wide, reinforced trim at the top. Use with a suspender belt. 20 denier.\\n \n", - "3 \\n##Product\\nName: Shape Up 30 den 1p Tights\\nType: Leggings/Tights\\nGroup: Garment Lower body\\nGarment Type: Socks and Tights\\nDescription: Tights with built-in support to lift the bottom. Black in 30 denier and light amber in 15 denier.\\n \n", - "4 \\n##Product\\nName: Support 40 den 1p Tights\\nType: Underwear Tights\\nGroup: Socks & Tights\\nGarment Type: Socks and Tights\\nDescription: Semi shiny tights that shape the tummy, thighs and calves while also encouraging blood circulation in the legs. Elasticated waist.\\n \n", - "... ... \n", - "8039 \\n##Product\\nName: EDC Marla dress\\nType: Dress\\nGroup: Garment Full body\\nGarment Type: Special Offers\\nDescription: Calf-length dress in a patterned Tencel™ lyocell weave with a V-neck, sewn in wrapover at the top and decorative ties at one side. 3/4-length dolman sleeves with narrow, covered elastication at the cuffs. Gathered seam at the waist with concealed elastication and a flared skirt with a gathered tier at the hem for added width. Unlined.\\n \n", - "8040 \\n##Product\\nName: Class Filippa Necklace\\nType: Necklace\\nGroup: Accessories\\nGarment Type: Accessories\\nDescription: Metal chain necklace with a pendant. Adjustable length.\\n \n", - "8041 \\n##Product\\nName: Flirty Albin bracelet pk\\nType: Bracelet\\nGroup: Accessories\\nGarment Type: Accessories\\nDescription: Metal chain bracelets. Two plain and two with pendants. Adjustable length.\\n \n", - "8042 \\n##Product\\nName: ED Sasha tee\\nType: T-shirt\\nGroup: Garment Upper body\\nGarment Type: Jersey Fancy\\nDescription: Oversized, straight-cut T-shirt in a soft modal and cotton jersey blend with a ribbed neckline and low dropped shoulders.\\n \n", - "8043 \\n##Product\\nName: Elton top\\nType: Top\\nGroup: Garment Upper body\\nGarment Type: Jersey Fancy\\nDescription: Fitted top in jersey with a round neckline and extra-long sleeves. Additional draped layer at the front.\\n \n", - "\n", - " textEmbedding \n", - "0 [-0.03175225707113605, 0.010736916046178453, -0.01710789586031919, -0.02766004773910761, 0.012160292759729692, 0.025730276446803743, -0.02367732842202476, -0.0026363268971752543, -0.0013421145057723066, -0.0319438636659682, -0.01532867566687212, 0.004119581370103521, -0.021446458657297864, -0.0016372257377681532, -0.012481921308447002, 0.012653000620952797, 0.011626526608563307, 0.005833792914914613, 0.003958767095744866, -0.020406299790024136, 0.012529823888477681, 0.024128977061981947, 0.0... \n", - "1 [-0.012565332302897267, 0.006915749474112066, -0.02248822849098327, -0.029920078783321327, 0.013865905731527375, 0.0055257183157612685, -0.010177505825736413, -0.009682049636285946, 0.0076795783870632905, -0.04621509745559271, 0.018731014087263194, 0.008236967531517697, -0.023520430127435252, -0.02204782278228894, -0.03223220828830526, 0.00807869670085184, -0.0015543233390761845, 0.02097433188828591, 0.018813590739720026, -0.010955097824537987, -0.005229820351882444, 0.012634144938181118, 0.... \n", - "2 [-0.004607602644290938, -0.00020654844478418472, -0.001998478179615022, -0.024342574406054824, 0.006394436618893359, -0.011269197042002268, -0.014905316684896495, -0.013781173159972387, 0.003337736250062193, -0.053958911548097475, -0.020997899104786104, -9.731088431201777e-05, -0.00738326689906045, -0.002747907584282141, -0.02088687242283662, 0.003750616199692916, 0.01246273247597543, -0.002135526711042585, -0.0006761344393854489, -0.04993419758705755, -0.012587636794676713, 0.02309352563110... \n", - "3 [-0.004574140987179611, -0.004500534380847251, 0.003792506246364087, -0.02551704751060567, -0.0010024553747197689, -0.005576596478314689, 0.0021854229316189662, -0.027592060067064855, 0.0051629961661446774, -0.04794961458102422, 0.0006475651658576645, 0.008867874746280617, -0.00979321813327877, 0.0016754324892777979, -0.030844782050131702, 0.0003310118281397553, 0.01605330798673457, -0.008398192988494885, 0.008538396136872949, -0.025110456331399834, 0.00993342221297931, 0.02035755637660432, ... \n", - "4 [-0.011162077685675415, 0.00277489112927228, 0.005810253639100346, -0.033645989346223495, 0.004191856594335809, -0.016572941998766574, -0.0028217760057120005, -0.03431279399428362, 0.0037889939005951046, -0.05239994364740206, -0.0018337202327463607, 0.004212694472418344, -0.010064623867683762, -0.011495480941028105, -0.02870050042979802, -0.010224379225558387, 0.021365619498812577, -0.017795422153930814, 0.011113456125369936, -0.02761693822008718, 0.006220062602642771, 0.012363719497095808, ... \n", - "... ... \n", - "8039 [-0.02881099786612282, 0.012809473772767501, -0.0067005519386992085, -0.015286064410282527, 6.599940260696032e-05, 0.011062102177775788, -0.03315878967707144, -0.031232551997158332, -0.0030613403639287657, -0.020459385191972743, 0.0026124584913229127, 0.02457327720548812, -0.013518053776115037, -0.0015117518776271578, -0.03775424064490382, 0.040313380919864056, -0.0016089236284562148, 0.002586660623675748, -0.013084650717229737, -0.00601261050886617, 0.0034483076801443188, 0.0104635933247421... \n", - "8040 [-0.016590671641800062, 0.012040743624614526, -0.025176715148243854, -0.04145639396746824, -0.0056214924065233, 0.007301517774561036, -0.022269628979580517, 0.010418184119156518, 0.004401192542139525, -0.020052131299228764, -0.003988792195194484, 0.0044282353557842545, 0.008592804908346904, 0.0031149762269503485, -0.010323534737061255, 0.0008788863793872006, 0.00986380969906858, 0.014373173030125735, 0.002117778178151727, -0.013419918738592546, -0.002111017474740545, 0.04535053678056746, 0.0... \n", - "8041 [-0.03218957492943251, 0.028780610117463832, -0.013999112314320914, -0.030513035926703357, -0.01266486603162879, 0.030317439554497855, -0.022926690652926397, -0.0016031917601359886, -0.004142451673400604, -0.03453673139589852, -0.010694930607946187, 0.004166901219926291, -0.013370409955466832, -0.0033915015819180835, -0.015969047738003516, 0.00409006012060363, -0.003608054575242374, 0.014627814673174996, -0.0034700893767748435, -0.010387565279332942, -0.017687501845615447, 0.0378618697233920... \n", - "8042 [-0.00844287551711891, 0.007132541505828001, -0.03071855976266079, -0.01537278778329336, -0.017736793561385977, 0.013805790130529455, -0.028557183689882994, -0.011245910460748566, -0.001601613514189059, -0.02146516635560515, 0.006950175516102691, -0.002230607795628108, -0.0012419469979745492, -0.0015281604555600072, -0.035446567593555904, -0.008929185133493915, 0.004346392370974579, -0.004846210820635078, 0.007929549165495446, -0.006663117698229231, -0.0054540976082732015, 0.0074297307158349... \n", - "8043 [-0.024739831002335774, -3.678847031750047e-05, -0.03112341936349941, -0.01898638292832367, 0.014739788661365483, 0.027315185581782614, -0.023493250182726716, 0.008315103830087489, -0.028685053017767102, -0.012438408686702603, -0.005047966586623956, 0.0036575497514291025, -0.0070103037096416665, -0.0141096493055365, -0.015465818849472086, 0.02036995011900216, 0.0037500158778638595, 0.003042821222647049, -0.007657566827515602, -0.018438433718755757, 0.0007418695981489927, -0.00353768621108207... \n", - "\n", - "[8018 rows x 3 columns]" - ] + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "summary": "{\n \"name\": \"gds\",\n \"rows\": 0,\n \"fields\": []\n}" + } }, - "execution_count": 11, "metadata": {}, - "output_type": "execute_result" + "execution_count": 13 } ], + "execution_count": 13, "source": [ - "%%time\n", + "#create vector index\n", "\n", - "count = 0\n", - "embeddings = []\n", - "for docs in gds_db_load.chunks(product_emb_df.text, n=500):\n", - " count += len(docs)\n", - " print(f'Embedded {count} of {product_emb_df.shape[0]}')\n", - " embeddings.extend(embedding_model.embed_documents(docs))\n", + "embedding_dimension = 1536 #default for OpenAI text-embedding-ada-002\n", + "\n", + "gds.run_cypher('''\n", + "CREATE VECTOR INDEX product_text_embeddings IF NOT EXISTS FOR (n:Product) ON (n.textEmbedding)\n", + "OPTIONS {indexConfig: {\n", + " `vector.dimensions`: toInteger($dimension),\n", + " `vector.similarity_function`: 'cosine'\n", + "}}''', params={'dimension': embedding_dimension})\n", + "\n", + "#wait for index to come online\n", + "gds.run_cypher('CALL db.awaitIndex(\"product_text_embeddings\", 300)')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": false, + "id": "rGoMu3bqekeE" + }, + "source": [ + "### Vector Search Using Cypher\n", "\n", - "# Set as column of dataframe to prepare for loading\n", - "product_emb_df['textEmbedding'] = embeddings\n", - "product_emb_df" + "To do vector search, we need to:\n", + "1. Take the search prompt and convert it to an embedding query vector\n", + "2. Use similarity search with that new vector to pull semantically similar documents" ] }, { "cell_type": "markdown", "metadata": { "collapsed": false, - "id": "zOudn8tmdyXo" + "id": "SN7zU0GbZACF" }, "source": [ - "#### Create Vector Properties and Index\n", "\n", - "Now, we will load the embeddings into Neo4j by MATCHing on ProductCode, then calling the `db.create.setNodeVectorProperty` to set the embedding property. This special function is used to set the properties as floats rather than double precision, which requires more space. This becomes important as these embedding vectors tend to be long, and the size can add up quickly.\n", "\n", - "After bulk loading, we will create the vector index. The [Neo4j Vector Index](https://neo4j.com/docs/cypher-manual/current/indexes-for-vector-search/) enables efficient Approximate Nearest Neighbor (ANN) search with vectors. It uses the Hierarchical Navigable Small World (HNSW) algorithm." + "Below is an example of converting a search prompt into a query vector. Again we can use Neo4j’s native integrations to call embedding APIs from Cypher. We use our same embedding model, OpenAI `text-embedding-ada-002` by default.\n" ] }, { - "cell_type": "code", - "execution_count": 12, "metadata": { + "ExecuteTime": { + "end_time": "2024-06-22T16:23:06.738096Z", + "start_time": "2024-06-22T16:23:06.494744Z" + }, "colab": { "base_uri": "https://localhost:8080/", - "height": 1000 + "height": 115 }, - "id": "yWtG_wWKdyXo", - "outputId": "a76a919d-f329-454e-cdc9-92a35fc2cf18" + "id": "JrywpfKFdzh4", + "outputId": "2e8dfb37-3029-434e-80ea-5cc4383ed792" }, + "cell_type": "code", + "source": [ + "#search_prompt = 'denim jeans, loose fit, high-waist'\n", + "search_prompt = 'denim jeans'\n", + "#generate embeddings\n", + "gds.run_cypher('''\n", + "RETURN genai.vector.encode($searchPrompt, \"OpenAI\", {token:$token}) AS queryVector\n", + "''', params={'searchPrompt':search_prompt, 'token':OPENAI_API_KEY})" + ], "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "====== loading Product text embeddings ======\n", - "staging 8,018 records\n", - "Set 100 of 8,018 text embeddings\n", - "Set 200 of 8,018 text embeddings\n", - "Set 300 of 8,018 text embeddings\n", - "Set 400 of 8,018 text embeddings\n", - "Set 500 of 8,018 text embeddings\n", - "Set 600 of 8,018 text embeddings\n", - "Set 700 of 8,018 text embeddings\n", - "Set 800 of 8,018 text embeddings\n", - "Set 900 of 8,018 text embeddings\n", - "Set 1,000 of 8,018 text embeddings\n", - "Set 1,100 of 8,018 text embeddings\n", - "Set 1,200 of 8,018 text embeddings\n", - "Set 1,300 of 8,018 text embeddings\n", - "Set 1,400 of 8,018 text embeddings\n", - "Set 1,500 of 8,018 text embeddings\n", - "Set 1,600 of 8,018 text embeddings\n", - "Set 1,700 of 8,018 text embeddings\n", - "Set 1,800 of 8,018 text embeddings\n", - "Set 1,900 of 8,018 text embeddings\n", - "Set 2,000 of 8,018 text embeddings\n", - "Set 2,100 of 8,018 text embeddings\n", - "Set 2,200 of 8,018 text embeddings\n", - "Set 2,300 of 8,018 text embeddings\n", - "Set 2,400 of 8,018 text embeddings\n", - "Set 2,500 of 8,018 text embeddings\n", - "Set 2,600 of 8,018 text embeddings\n", - "Set 2,700 of 8,018 text embeddings\n", - "Set 2,800 of 8,018 text embeddings\n", - "Set 2,900 of 8,018 text embeddings\n", - "Set 3,000 of 8,018 text embeddings\n", - "Set 3,100 of 8,018 text embeddings\n", - "Set 3,200 of 8,018 text embeddings\n", - "Set 3,300 of 8,018 text embeddings\n", - "Set 3,400 of 8,018 text embeddings\n", - "Set 3,500 of 8,018 text embeddings\n", - "Set 3,600 of 8,018 text embeddings\n", - "Set 3,700 of 8,018 text embeddings\n", - "Set 3,800 of 8,018 text embeddings\n", - "Set 3,900 of 8,018 text embeddings\n", - "Set 4,000 of 8,018 text embeddings\n", - "Set 4,100 of 8,018 text embeddings\n", - "Set 4,200 of 8,018 text embeddings\n", - "Set 4,300 of 8,018 text embeddings\n", - "Set 4,400 of 8,018 text embeddings\n", - "Set 4,500 of 8,018 text embeddings\n", - "Set 4,600 of 8,018 text embeddings\n", - "Set 4,700 of 8,018 text embeddings\n", - "Set 4,800 of 8,018 text embeddings\n", - "Set 4,900 of 8,018 text embeddings\n", - "Set 5,000 of 8,018 text embeddings\n", - "Set 5,100 of 8,018 text embeddings\n", - "Set 5,200 of 8,018 text embeddings\n", - "Set 5,300 of 8,018 text embeddings\n", - "Set 5,400 of 8,018 text embeddings\n", - "Set 5,500 of 8,018 text embeddings\n", - "Set 5,600 of 8,018 text embeddings\n", - "Set 5,700 of 8,018 text embeddings\n", - "Set 5,800 of 8,018 text embeddings\n", - "Set 5,900 of 8,018 text embeddings\n", - "Set 6,000 of 8,018 text embeddings\n", - "Set 6,100 of 8,018 text embeddings\n", - "Set 6,200 of 8,018 text embeddings\n", - "Set 6,300 of 8,018 text embeddings\n", - "Set 6,400 of 8,018 text embeddings\n", - "Set 6,500 of 8,018 text embeddings\n", - "Set 6,600 of 8,018 text embeddings\n", - "Set 6,700 of 8,018 text embeddings\n", - "Set 6,800 of 8,018 text embeddings\n", - "Set 6,900 of 8,018 text embeddings\n", - "Set 7,000 of 8,018 text embeddings\n", - "Set 7,100 of 8,018 text embeddings\n", - "Set 7,200 of 8,018 text embeddings\n", - "Set 7,300 of 8,018 text embeddings\n", - "Set 7,400 of 8,018 text embeddings\n", - "Set 7,500 of 8,018 text embeddings\n", - "Set 7,600 of 8,018 text embeddings\n", - "Set 7,700 of 8,018 text embeddings\n", - "Set 7,800 of 8,018 text embeddings\n", - "Set 7,900 of 8,018 text embeddings\n", - "Set 8,000 of 8,018 text embeddings\n", - "Set 8,018 of 8,018 text embeddings\n" - ] - }, - { + "output_type": "execute_result", "data": { + "text/plain": [ + " queryVector\n", + "0 [-0.02218937873840332, -0.012347104959189892, -0.00806528888642788, -0.02785475365817547, -0.028012124821543694, 0.004937530495226383, -0.026674466207623482, -0.020117321982979774, -0.006101423874497414, -0.031579211354255676, 0.0024671258870512247, 0.006199780851602554, 0.0021704151295125484, -0.013691319152712822, -0.029900582507252693, 0.009127546101808548, 0.024890923872590065, -0.013520833104848862, 0.041073959320783615, -0.025140095502138138, -0.016314176842570305, 0.002703183097764849..." + ], "text/html": [ - "
\n", + "\n", + "
\n", + "
\n", "\n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "summary": "{\n \"name\": \"''', params={'searchPrompt':search_prompt, 'token':OPENAI_API_KEY})\",\n \"rows\": 1,\n \"fields\": [\n {\n \"column\": \"queryVector\",\n \"properties\": {\n \"dtype\": \"object\",\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 7 } ], - "source": [ - "#search_prompt = 'denim jeans, loose fit, high-waist'\n", - "search_prompt = 'Oversized Sweaters'\n", - "\n", - "query_vector = embedding_model.embed_query(search_prompt)\n", - "print(f'query vector length: {len(query_vector)}')\n", - "print(f'query vector sample: {query_vector[:10]}')" - ] + "execution_count": 7 }, { - "cell_type": "markdown", "metadata": { - "collapsed": false, - "id": "7yJwN1fxZACG" + "id": "hDTQ3iE0dzh4" }, + "cell_type": "markdown", "source": [ - "Now we can take that and use it in a Cypher query with the vector index to retrieve semantically similar documents." + "To conduct vector search, we simply combine the above query with a call to the vector index to retrieve semantically similar documents. " ] }, { - "cell_type": "code", - "execution_count": 14, "metadata": { "colab": { "base_uri": "https://localhost:8080/", - "height": 762 + "height": 519 }, - "id": "pRTY5LnPdyXp", - "outputId": "15725432-f1d9-468e-9419-ade66c0ac9d8" + "id": "T2nck70Wdzh4", + "outputId": "8b0fa555-9401-435d-a9dc-10f0cb520e0a" }, + "cell_type": "code", "outputs": [ { + "output_type": "execute_result", "data": { + "text/plain": [ + " productCode \\\n", + "0 252298 \n", + "1 598423 \n", + "2 727804 \n", + "3 652924 \n", + "4 810170 \n", + "5 698387 \n", + "6 749656 \n", + "7 571650 \n", + "8 620223 \n", + "9 522754 \n", + "\n", + " text \\\n", + "0 ##Product\\nName: Didi denim\\nType: Trousers\\nGroup: Garment Lower body\\nGarment Type: Dresses Ladies\\nDescription: Jeans in washed, stretch denim with hard-worn details, a regular waist, front and back pockets and skinny legs. \n", + "1 ##Product\\nName: Night Denim\\nType: Trousers\\nGroup: Garment Lower body\\nGarment Type: Dresses Ladies\\nDescription: High-waisted jeans in washed stretch denim with a zip fly and button and decorative zips on the waistband. Front and back pockets and skinny legs with a zip down the sides. \n", + "2 ##Product\\nName: Didi HW Skinny denim\\nType: Trousers\\nGroup: Garment Lower body\\nGarment Type: Trousers\\nDescription: Jeans in washed, stretch denim with hard-worn details, a high waist, visible button fly, front and back pockets and skinny legs. \n", + "3 ##Product\\nName: &DENIM Jeggings HW\\nType: Trousers\\nGroup: Garment Lower body\\nGarment Type: Trousers Denim\\nDescription: High-waisted jeggings in stretch denim with a zip fly and button, fake front pockets and real back pockets. \n", + "4 ##Product\\nName: Skinny denim (D)\\nType: Trousers\\nGroup: Garment Lower body\\nGarment Type: Trousers\\nDescription: Jeans in washed, stretch denim with fake front pockets, real back pockets and skinny legs. Wide ribbing at the waist for best fit over the tummy. \n", + "5 ##Product\\nName: &DENIM+ Skinny Shaping HW\\nType: Trousers\\nGroup: Garment Lower body\\nGarment Type: Trousers Denim\\nDescription: 5-pocket jeans in washed, stretch denim with a high waist, zip fly and button and skinny legs. Shaping – denim with a stretch function that holds in and shapes the waist, thighs and bum while keeping the jeans in shape. \n", + "6 ##Product\\nName: &DENIM jen bermuda shorts\\nType: Shorts\\nGroup: Garment Lower body\\nGarment Type: Trousers Denim\\nDescription: 5-pocket shorts in washed, stretch denim with a regular waist, zip fly and button, and sewn-in turn-ups at the hems. The cotton content of the shorts is partly recycled. \n", + "7 ##Product\\nName: Taylor Fancy Denim\\nType: Trousers\\nGroup: Garment Lower body\\nGarment Type: Dresses Ladies\\nDescription: Low-rise, ankle-length jeans in washed stretch denim with front and back pockets, a zip fly and button, and skinny legs with raw-edge hems. \n", + "8 ##Product\\nName: Beat denim\\nType: Trousers\\nGroup: Garment Lower body\\nGarment Type: Trousers\\nDescription: High-waisted, ankle-length denim jeans with a zip fly and button, side pockets, welt back pockets and wide, straight legs with creases. \n", + "9 ##Product\\nName: Snake fancy denim\\nType: Trousers\\nGroup: Garment Lower body\\nGarment Type: Trousers\\nDescription: 5-pocket superskinny-fit jeans in stretch denim with a regular waist, zip fly and skinny legs. \n", + "\n", + " score \n", + "0 0.938433 \n", + "1 0.936743 \n", + "2 0.934681 \n", + "3 0.934484 \n", + "4 0.934032 \n", + "5 0.933894 \n", + "6 0.933860 \n", + "7 0.933407 \n", + "8 0.933102 \n", + "9 0.932913 " + ], "text/html": [ - "
\n", + "\n", + "
\n", + "
\n", "\n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "summary": "{\n \"name\": \"''', params={'searchPrompt':search_prompt, 'token':OPENAI_API_KEY})\",\n \"rows\": 10,\n \"fields\": [\n {\n \"column\": \"productCode\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 156094,\n \"min\": 252298,\n \"max\": 810170,\n \"num_unique_values\": 10,\n \"samples\": [\n 620223,\n 598423,\n 698387\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"text\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 10,\n \"samples\": [\n \"##Product\\nName: Beat denim\\nType: Trousers\\nGroup: Garment Lower body\\nGarment Type: Trousers\\nDescription: High-waisted, ankle-length denim jeans with a zip fly and button, side pockets, welt back pockets and wide, straight legs with creases.\",\n \"##Product\\nName: Night Denim\\nType: Trousers\\nGroup: Garment Lower body\\nGarment Type: Dresses Ladies\\nDescription: High-waisted jeans in washed stretch denim with a zip fly and button and decorative zips on the waistband. Front and back pockets and skinny legs with a zip down the sides.\",\n \"##Product\\nName: &DENIM+ Skinny Shaping HW\\nType: Trousers\\nGroup: Garment Lower body\\nGarment Type: Trousers Denim\\nDescription: 5-pocket jeans in washed, stretch denim with a high waist, zip fly and button and skinny legs. Shaping \\u2013 denim with a stretch function that holds in and shapes the waist, thighs and bum while keeping the jeans in shape.\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"score\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.0017372145247116352,\n \"min\": 0.932913064956665,\n \"max\": 0.9384325742721558,\n \"num_unique_values\": 10,\n \"samples\": [\n 0.9331021308898926,\n 0.936742901802063,\n 0.9338935613632202\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } }, - "execution_count": 14, "metadata": {}, - "output_type": "execute_result" + "execution_count": 8 } ], + "execution_count": 8, "source": [ + "#generate embeddings\n", "gds.run_cypher('''\n", - "CALL db.index.vector.queryNodes(\"product_text_embeddings\", 10, $queryVector)\n", + "\n", + "//convert search prompt to query vector\n", + "WITH genai.vector.encode(\n", + " $searchPrompt,\n", + " \"OpenAI\",\n", + " {token:$token}) AS queryVector\n", + "\n", + "//find similar products via vector search with index\n", + "CALL db.index.vector.queryNodes(\"product_text_embeddings\", 10, queryVector)\n", "YIELD node AS product, score\n", "RETURN product.productCode AS productCode,\n", " product.text AS text,\n", " score\n", - "''', params={'queryVector': query_vector})" + "''', params={'searchPrompt':search_prompt, 'token':OPENAI_API_KEY})" + ] + }, + { + "metadata": { + "id": "z4UlyNKcdzh5" + }, + "cell_type": "markdown", + "source": [ + "While the above Cypher vector encoding calls are convenient, you have the freedom to generate embeddings externally from any model/API you want. You can then ingest them into Neo4j and use them for vector search.\n", + "Below is an example of generating an embedding externally and sending it as a query parameter, in this case for search, but you can also load embeddings in a similar matter." ] }, + { + "cell_type": "code", + "metadata": { + "id": "elNIp0_BdyXo", + "ExecuteTime": { + "end_time": "2024-06-22T16:45:42.024437Z", + "start_time": "2024-06-22T16:45:42.021799Z" + } + }, + "source": [ + "## Optional for workshop\n", + "# from langchain_openai import OpenAIEmbeddings\n", + "#\n", + "# embedding_model = OpenAIEmbeddings()\n", + "# embedding_dimension = 1536\n", + "#\n", + "# query_vector = embedding_model.embed_query(search_prompt)\n", + "# print(f'query vector length: {len(query_vector)}')\n", + "# print(f'query vector sample: {query_vector[:10]}')\n", + "# print(f'Result:')\n", + "#\n", + "# gds.run_cypher('''\n", + "# CALL db.index.vector.queryNodes(\"product_text_embeddings\", 10, $queryVector)\n", + "# YIELD node AS product, score\n", + "# RETURN product.productCode AS productCode,\n", + "# product.text AS text,\n", + "# score\n", + "# ''', params={'queryVector': query_vector})" + ], + "outputs": [], + "execution_count": null + }, { "cell_type": "markdown", "metadata": { @@ -1794,18 +2134,22 @@ "source": [ "### Vector Search Using Langchain\n", "\n", - "We can also do this vector search with Langchain, a recommended approach going forward. To do this, we use the Neo4jVector class and call the below method to set up from an existing index in the graph." + "We can also do this vector search with Langchain, a recommended approach as we seek to integrate LLM chains going forward. To do this, we use the `Neo4jVector` class and call the below method to set up from an existing index in the graph." ] }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 9, "metadata": { "id": "ofoi5aJvekeF" }, "outputs": [], "source": [ "from langchain.vectorstores.neo4j_vector import Neo4jVector\n", + "from langchain_openai import OpenAIEmbeddings\n", + "\n", + "embedding_model = OpenAIEmbeddings()\n", + "embedding_dimension = 1536\n", "\n", "kg_vector_search = Neo4jVector.from_existing_index(\n", " embedding=embedding_model,\n", @@ -1827,33 +2171,33 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 10, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "MDFoMXmbekeF", - "outputId": "ad51a5c6-4402-4f3c-e2d9-3b3354569f6e" + "outputId": "dded9cd4-41a9-4263-b19e-9c20fbc41d4a" }, "outputs": [ { + "output_type": "execute_result", "data": { "text/plain": [ - "[Document(page_content='##Product\\nName: Betsy Oversized\\nType: Sweater\\nGroup: Garment Upper body\\nGarment Type: Knitwear\\nDescription: Oversized, V-neck jumper in a soft, loose knit containing some wool and alpaca wool. Dropped shoulders, long, wide sleeves, wide ribbing around the neckline, cuffs and hem, and slits in the sides.', metadata={'prodName': 'Betsy Oversized', 'garmentGroupName': 'Knitwear', 'garmentGroupNo': 1003, 'productCode': 842001, 'productTypeName': 'Sweater', 'productTypeNo': 252, 'detailDesc': 'Oversized, V-neck jumper in a soft, loose knit containing some wool and alpaca wool. Dropped shoulders, long, wide sleeves, wide ribbing around the neckline, cuffs and hem, and slits in the sides.', 'productGroupName': 'Garment Upper body'}),\n", - " Document(page_content='##Product\\nName: Japp oversize sweater\\nType: Sweater\\nGroup: Garment Upper body\\nGarment Type: Jersey Basic\\nDescription: Relaxed-fit top in sweatshirt fabric with a ribbed turtle neck, dropped shoulders, long, wide sleeves and ribbing at the cuffs and hem. Longer at the back.', metadata={'prodName': 'Japp oversize sweater', 'garmentGroupName': 'Jersey Basic', 'garmentGroupNo': 1002, 'productCode': 817392, 'productTypeName': 'Sweater', 'productTypeNo': 252, 'detailDesc': 'Relaxed-fit top in sweatshirt fabric with a ribbed turtle neck, dropped shoulders, long, wide sleeves and ribbing at the cuffs and hem. Longer at the back.', 'productGroupName': 'Garment Upper body'}),\n", - " Document(page_content='##Product\\nName: DIV Anni oversize hood\\nType: Sweater\\nGroup: Garment Upper body\\nGarment Type: Unknown\\nDescription: Oversized top in sweatshirt fabric with a lined hood with a wrapover front. Kangaroo pocket, dropped shoulders, long sleeves and ribbing at the cuffs and hem. Soft brushed inside.', metadata={'prodName': 'DIV Anni oversize hood', 'garmentGroupName': 'Unknown', 'garmentGroupNo': 1001, 'productCode': 709418, 'productTypeName': 'Sweater', 'productTypeNo': 252, 'detailDesc': 'Oversized top in sweatshirt fabric with a lined hood with a wrapover front. Kangaroo pocket, dropped shoulders, long sleeves and ribbing at the cuffs and hem. Soft brushed inside.', 'productGroupName': 'Garment Upper body'}),\n", - " Document(page_content='##Product\\nName: Runar sweater\\nType: Sweater\\nGroup: Garment Upper body\\nGarment Type: Jersey Basic\\nDescription: Oversized top in soft sweatshirt fabric. Relaxed fit with low dropped shoulders, extra-long sleeves and ribbing around the neckline, cuffs and hem. Soft brushed inside.', metadata={'prodName': 'Runar sweater', 'garmentGroupName': 'Jersey Basic', 'garmentGroupNo': 1002, 'productCode': 860833, 'productTypeName': 'Sweater', 'productTypeNo': 252, 'detailDesc': 'Oversized top in soft sweatshirt fabric. Relaxed fit with low dropped shoulders, extra-long sleeves and ribbing around the neckline, cuffs and hem. Soft brushed inside.', 'productGroupName': 'Garment Upper body'}),\n", - " Document(page_content='##Product\\nName: Sandy\\nType: Sweater\\nGroup: Garment Upper body\\nGarment Type: Knitwear\\nDescription: Oversized jumper in a soft knit containing some wool with dropped shoulders, long sleeves, a rounded hem and ribbing around the neckline, cuffs and hem. Longer at the back. The polyester content of the jumper is recycled.', metadata={'prodName': 'Sandy', 'garmentGroupName': 'Knitwear', 'garmentGroupNo': 1003, 'productCode': 893141, 'productTypeName': 'Sweater', 'productTypeNo': 252, 'detailDesc': 'Oversized jumper in a soft knit containing some wool with dropped shoulders, long sleeves, a rounded hem and ribbing around the neckline, cuffs and hem. Longer at the back. The polyester content of the jumper is recycled.', 'productGroupName': 'Garment Upper body'}),\n", - " Document(page_content='##Product\\nName: Macy\\nType: Sweater\\nGroup: Garment Upper body\\nGarment Type: Knitwear\\nDescription: Oversized jumper in a soft knit containing some wool with a ribbed polo neck, low dropped shoulders, long sleeves, and ribbing at the cuffs and hem. The polyester content of the jumper is recycled.', metadata={'prodName': 'Macy', 'garmentGroupName': 'Knitwear', 'garmentGroupNo': 1003, 'productCode': 812167, 'productTypeName': 'Sweater', 'productTypeNo': 252, 'detailDesc': 'Oversized jumper in a soft knit containing some wool with a ribbed polo neck, low dropped shoulders, long sleeves, and ribbing at the cuffs and hem. The polyester content of the jumper is recycled.', 'productGroupName': 'Garment Upper body'}),\n", - " Document(page_content='##Product\\nName: Simba\\nType: Sweater\\nGroup: Garment Upper body\\nGarment Type: Knitwear\\nDescription: Oversized jumper in a soft knit containing some wool with a cowl neck, long raglan sleeves and wide ribbing at the cuffs and hem.', metadata={'prodName': 'Simba', 'garmentGroupName': 'Knitwear', 'garmentGroupNo': 1003, 'productCode': 690623, 'productTypeName': 'Sweater', 'productTypeNo': 252, 'detailDesc': 'Oversized jumper in a soft knit containing some wool with a cowl neck, long raglan sleeves and wide ribbing at the cuffs and hem.', 'productGroupName': 'Garment Upper body'}),\n", - " Document(page_content='##Product\\nName: Happy\\nType: Sweater\\nGroup: Garment Upper body\\nGarment Type: Knitwear\\nDescription: Oversized jumper in a soft cable knit with a polo neck, low dropped shoulders and long, wide sleeves. Straight hem and ribbing at the cuffs and hem. Slightly longer at the back.', metadata={'prodName': 'Happy', 'garmentGroupName': 'Knitwear', 'garmentGroupNo': 1003, 'productCode': 690624, 'productTypeName': 'Sweater', 'productTypeNo': 252, 'detailDesc': 'Oversized jumper in a soft cable knit with a polo neck, low dropped shoulders and long, wide sleeves. Straight hem and ribbing at the cuffs and hem. Slightly longer at the back.', 'productGroupName': 'Garment Upper body'}),\n", - " Document(page_content='##Product\\nName: Petar Sweater(1)\\nType: Sweater\\nGroup: Garment Upper body\\nGarment Type: Jersey Basic\\nDescription: Oversized top in sturdy sweatshirt fabric with dropped shoulders and ribbing around the neckline, cuffs and hem. Soft brushed inside.', metadata={'prodName': 'Petar Sweater(1)', 'garmentGroupName': 'Jersey Basic', 'garmentGroupNo': 1002, 'productCode': 557247, 'productTypeName': 'Sweater', 'productTypeNo': 252, 'detailDesc': 'Oversized top in sturdy sweatshirt fabric with dropped shoulders and ribbing around the neckline, cuffs and hem. Soft brushed inside.', 'productGroupName': 'Garment Upper body'}),\n", - " Document(page_content='##Product\\nName: Sister off shoulder\\nType: Sweater\\nGroup: Garment Upper body\\nGarment Type: Jersey Fancy\\nDescription: Off-the-shoulder top in sweatshirt fabric with long, wide sleeves and ribbing around the top, cuffs and hem. Soft brushed inside.', metadata={'prodName': 'Sister off shoulder', 'garmentGroupName': 'Jersey Fancy', 'garmentGroupNo': 1005, 'productCode': 687934, 'productTypeName': 'Sweater', 'productTypeNo': 252, 'detailDesc': 'Off-the-shoulder top in sweatshirt fabric with long, wide sleeves and ribbing around the top, cuffs and hem. Soft brushed inside.', 'productGroupName': 'Garment Upper body'})]" + "[Document(page_content='##Product\\nName: Didi denim\\nType: Trousers\\nGroup: Garment Lower body\\nGarment Type: Dresses Ladies\\nDescription: Jeans in washed, stretch denim with hard-worn details, a regular waist, front and back pockets and skinny legs.', metadata={'prodName': 'Didi denim', 'garmentGroupName': 'Dresses Ladies', 'garmentGroupNo': 1013, 'productCode': 252298, 'productTypeName': 'Trousers', 'productTypeNo': 272, 'detailDesc': 'Jeans in washed, stretch denim with hard-worn details, a regular waist, front and back pockets and skinny legs.', 'productGroupName': 'Garment Lower body', 'url': 'https://representative-domain/product/252298'}),\n", + " Document(page_content='##Product\\nName: Night Denim\\nType: Trousers\\nGroup: Garment Lower body\\nGarment Type: Dresses Ladies\\nDescription: High-waisted jeans in washed stretch denim with a zip fly and button and decorative zips on the waistband. Front and back pockets and skinny legs with a zip down the sides.', metadata={'prodName': 'Night Denim', 'garmentGroupName': 'Dresses Ladies', 'garmentGroupNo': 1013, 'productCode': 598423, 'productTypeName': 'Trousers', 'productTypeNo': 272, 'detailDesc': 'High-waisted jeans in washed stretch denim with a zip fly and button and decorative zips on the waistband. Front and back pockets and skinny legs with a zip down the sides.', 'productGroupName': 'Garment Lower body', 'url': 'https://representative-domain/product/598423'}),\n", + " Document(page_content='##Product\\nName: Didi HW Skinny denim\\nType: Trousers\\nGroup: Garment Lower body\\nGarment Type: Trousers\\nDescription: Jeans in washed, stretch denim with hard-worn details, a high waist, visible button fly, front and back pockets and skinny legs.', metadata={'prodName': 'Didi HW Skinny denim', 'garmentGroupName': 'Trousers', 'garmentGroupNo': 1009, 'productCode': 727804, 'productTypeName': 'Trousers', 'productTypeNo': 272, 'detailDesc': 'Jeans in washed, stretch denim with hard-worn details, a high waist, visible button fly, front and back pockets and skinny legs.', 'productGroupName': 'Garment Lower body', 'url': 'https://representative-domain/product/727804'}),\n", + " Document(page_content='##Product\\nName: &DENIM Jeggings HW\\nType: Trousers\\nGroup: Garment Lower body\\nGarment Type: Trousers Denim\\nDescription: High-waisted jeggings in stretch denim with a zip fly and button, fake front pockets and real back pockets.', metadata={'prodName': '&DENIM Jeggings HW', 'garmentGroupName': 'Trousers Denim', 'garmentGroupNo': 1016, 'productCode': 652924, 'productTypeName': 'Trousers', 'productTypeNo': 272, 'detailDesc': 'High-waisted jeggings in stretch denim with a zip fly and button, fake front pockets and real back pockets.', 'productGroupName': 'Garment Lower body', 'url': 'https://representative-domain/product/652924'}),\n", + " Document(page_content='##Product\\nName: Skinny denim (D)\\nType: Trousers\\nGroup: Garment Lower body\\nGarment Type: Trousers\\nDescription: Jeans in washed, stretch denim with fake front pockets, real back pockets and skinny legs. Wide ribbing at the waist for best fit over the tummy.', metadata={'prodName': 'Skinny denim (D)', 'garmentGroupName': 'Trousers', 'garmentGroupNo': 1009, 'productCode': 810170, 'productTypeName': 'Trousers', 'productTypeNo': 272, 'detailDesc': 'Jeans in washed, stretch denim with fake front pockets, real back pockets and skinny legs. Wide ribbing at the waist for best fit over the tummy.', 'productGroupName': 'Garment Lower body', 'url': 'https://representative-domain/product/810170'}),\n", + " Document(page_content='##Product\\nName: &DENIM+ Skinny Shaping HW\\nType: Trousers\\nGroup: Garment Lower body\\nGarment Type: Trousers Denim\\nDescription: 5-pocket jeans in washed, stretch denim with a high waist, zip fly and button and skinny legs. Shaping – denim with a stretch function that holds in and shapes the waist, thighs and bum while keeping the jeans in shape.', metadata={'prodName': '&DENIM+ Skinny Shaping HW', 'garmentGroupName': 'Trousers Denim', 'garmentGroupNo': 1016, 'productCode': 698387, 'productTypeName': 'Trousers', 'productTypeNo': 272, 'detailDesc': '5-pocket jeans in washed, stretch denim with a high waist, zip fly and button and skinny legs. Shaping – denim with a stretch function that holds in and shapes the waist, thighs and bum while keeping the jeans in shape.', 'productGroupName': 'Garment Lower body', 'url': 'https://representative-domain/product/698387'}),\n", + " Document(page_content='##Product\\nName: &DENIM jen bermuda shorts\\nType: Shorts\\nGroup: Garment Lower body\\nGarment Type: Trousers Denim\\nDescription: 5-pocket shorts in washed, stretch denim with a regular waist, zip fly and button, and sewn-in turn-ups at the hems. The cotton content of the shorts is partly recycled.', metadata={'prodName': '&DENIM jen bermuda shorts', 'garmentGroupName': 'Trousers Denim', 'garmentGroupNo': 1016, 'productCode': 749656, 'productTypeName': 'Shorts', 'productTypeNo': 274, 'detailDesc': '5-pocket shorts in washed, stretch denim with a regular waist, zip fly and button, and sewn-in turn-ups at the hems. The cotton content of the shorts is partly recycled.', 'productGroupName': 'Garment Lower body', 'url': 'https://representative-domain/product/749656'}),\n", + " Document(page_content='##Product\\nName: Taylor Fancy Denim\\nType: Trousers\\nGroup: Garment Lower body\\nGarment Type: Dresses Ladies\\nDescription: Low-rise, ankle-length jeans in washed stretch denim with front and back pockets, a zip fly and button, and skinny legs with raw-edge hems.', metadata={'prodName': 'Taylor Fancy Denim', 'garmentGroupName': 'Dresses Ladies', 'garmentGroupNo': 1013, 'productCode': 571650, 'productTypeName': 'Trousers', 'productTypeNo': 272, 'detailDesc': 'Low-rise, ankle-length jeans in washed stretch denim with front and back pockets, a zip fly and button, and skinny legs with raw-edge hems.', 'productGroupName': 'Garment Lower body', 'url': 'https://representative-domain/product/571650'}),\n", + " Document(page_content='##Product\\nName: Beat denim\\nType: Trousers\\nGroup: Garment Lower body\\nGarment Type: Trousers\\nDescription: High-waisted, ankle-length denim jeans with a zip fly and button, side pockets, welt back pockets and wide, straight legs with creases.', metadata={'prodName': 'Beat denim', 'garmentGroupName': 'Trousers', 'garmentGroupNo': 1009, 'productCode': 620223, 'productTypeName': 'Trousers', 'productTypeNo': 272, 'detailDesc': 'High-waisted, ankle-length denim jeans with a zip fly and button, side pockets, welt back pockets and wide, straight legs with creases.', 'productGroupName': 'Garment Lower body', 'url': 'https://representative-domain/product/620223'}),\n", + " Document(page_content='##Product\\nName: Snake fancy denim\\nType: Trousers\\nGroup: Garment Lower body\\nGarment Type: Trousers\\nDescription: 5-pocket superskinny-fit jeans in stretch denim with a regular waist, zip fly and skinny legs.', metadata={'prodName': 'Snake fancy denim', 'garmentGroupName': 'Trousers', 'garmentGroupNo': 1009, 'productCode': 522754, 'productTypeName': 'Trousers', 'productTypeNo': 272, 'detailDesc': '5-pocket superskinny-fit jeans in stretch denim with a regular waist, zip fly and skinny legs.', 'productGroupName': 'Garment Lower body', 'url': 'https://representative-domain/product/522754'})]" ] }, - "execution_count": 16, "metadata": {}, - "output_type": "execute_result" + "execution_count": 10 } ], "source": [ @@ -1863,20 +2207,36 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 21, "metadata": { "colab": { "base_uri": "https://localhost:8080/", - "height": 658 + "height": 449 }, "id": "qQP7e2dcekeF", - "outputId": "610d663e-15a6-4a84-c16d-b8fe16f8283c" + "outputId": "4056afb2-ede2-4dc2-9cec-facd8c7b16cc" }, "outputs": [ { + "output_type": "execute_result", "data": { + "text/plain": [ + " document\n", + "0 ##Product\\nName: Didi denim\\nType: Trousers\\nGroup: Garment Lower body\\nGarment Type: Dresses Ladies\\nDescription: Jeans in washed, stretch denim with hard-worn details, a regular waist, front and back pockets and skinny legs.\n", + "1 ##Product\\nName: Night Denim\\nType: Trousers\\nGroup: Garment Lower body\\nGarment Type: Dresses Ladies\\nDescription: High-waisted jeans in washed stretch denim with a zip fly and button and decorative zips on the waistband. Front and back pockets and skinny legs with a zip down the sides.\n", + "2 ##Product\\nName: Didi HW Skinny denim\\nType: Trousers\\nGroup: Garment Lower body\\nGarment Type: Trousers\\nDescription: Jeans in washed, stretch denim with hard-worn details, a high waist, visible button fly, front and back pockets and skinny legs.\n", + "3 ##Product\\nName: &DENIM Jeggings HW\\nType: Trousers\\nGroup: Garment Lower body\\nGarment Type: Trousers Denim\\nDescription: High-waisted jeggings in stretch denim with a zip fly and button, fake front pockets and real back pockets.\n", + "4 ##Product\\nName: Skinny denim (D)\\nType: Trousers\\nGroup: Garment Lower body\\nGarment Type: Trousers\\nDescription: Jeans in washed, stretch denim with fake front pockets, real back pockets and skinny legs. Wide ribbing at the waist for best fit over the tummy.\n", + "5 ##Product\\nName: &DENIM+ Skinny Shaping HW\\nType: Trousers\\nGroup: Garment Lower body\\nGarment Type: Trousers Denim\\nDescription: 5-pocket jeans in washed, stretch denim with a high waist, zip fly and button and skinny legs. Shaping – denim with a stretch function that holds in and shapes the waist, thighs and bum while keeping the jeans in shape.\n", + "6 ##Product\\nName: &DENIM jen bermuda shorts\\nType: Shorts\\nGroup: Garment Lower body\\nGarment Type: Trousers Denim\\nDescription: 5-pocket shorts in washed, stretch denim with a regular waist, zip fly and button, and sewn-in turn-ups at the hems. The cotton content of the shorts is partly recycled.\n", + "7 ##Product\\nName: Taylor Fancy Denim\\nType: Trousers\\nGroup: Garment Lower body\\nGarment Type: Dresses Ladies\\nDescription: Low-rise, ankle-length jeans in washed stretch denim with front and back pockets, a zip fly and button, and skinny legs with raw-edge hems.\n", + "8 ##Product\\nName: Beat denim\\nType: Trousers\\nGroup: Garment Lower body\\nGarment Type: Trousers\\nDescription: High-waisted, ankle-length denim jeans with a zip fly and button, side pockets, welt back pockets and wide, straight legs with creases.\n", + "9 ##Product\\nName: Snake fancy denim\\nType: Trousers\\nGroup: Garment Lower body\\nGarment Type: Trousers\\nDescription: 5-pocket superskinny-fit jeans in stretch denim with a regular waist, zip fly and skinny legs." + ], "text/html": [ - "
\n", + "\n", + "
\n", + "
\n", "\n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" ], - "text/plain": [ - " document\n", - "0 ##Product\\nName: Betsy Oversized\\nType: Sweater\\nGroup: Garment Upper body\\nGarment Type: Knitwear\\nDescription: Oversized, V-neck jumper in a soft, loose knit containing some wool and alpaca wool. Dropped shoulders, long, wide sleeves, wide ribbing around the neckline, cuffs and hem, and slits in the sides.\n", - "1 ##Product\\nName: Japp oversize sweater\\nType: Sweater\\nGroup: Garment Upper body\\nGarment Type: Jersey Basic\\nDescription: Relaxed-fit top in sweatshirt fabric with a ribbed turtle neck, dropped shoulders, long, wide sleeves and ribbing at the cuffs and hem. Longer at the back.\n", - "2 ##Product\\nName: DIV Anni oversize hood\\nType: Sweater\\nGroup: Garment Upper body\\nGarment Type: Unknown\\nDescription: Oversized top in sweatshirt fabric with a lined hood with a wrapover front. Kangaroo pocket, dropped shoulders, long sleeves and ribbing at the cuffs and hem. Soft brushed inside.\n", - "3 ##Product\\nName: Runar sweater\\nType: Sweater\\nGroup: Garment Upper body\\nGarment Type: Jersey Basic\\nDescription: Oversized top in soft sweatshirt fabric. Relaxed fit with low dropped shoulders, extra-long sleeves and ribbing around the neckline, cuffs and hem. Soft brushed inside.\n", - "4 ##Product\\nName: Sandy\\nType: Sweater\\nGroup: Garment Upper body\\nGarment Type: Knitwear\\nDescription: Oversized jumper in a soft knit containing some wool with dropped shoulders, long sleeves, a rounded hem and ribbing around the neckline, cuffs and hem. Longer at the back. The polyester content of the jumper is recycled.\n", - "5 ##Product\\nName: Macy\\nType: Sweater\\nGroup: Garment Upper body\\nGarment Type: Knitwear\\nDescription: Oversized jumper in a soft knit containing some wool with a ribbed polo neck, low dropped shoulders, long sleeves, and ribbing at the cuffs and hem. The polyester content of the jumper is recycled.\n", - "6 ##Product\\nName: Simba\\nType: Sweater\\nGroup: Garment Upper body\\nGarment Type: Knitwear\\nDescription: Oversized jumper in a soft knit containing some wool with a cowl neck, long raglan sleeves and wide ribbing at the cuffs and hem.\n", - "7 ##Product\\nName: Happy\\nType: Sweater\\nGroup: Garment Upper body\\nGarment Type: Knitwear\\nDescription: Oversized jumper in a soft cable knit with a polo neck, low dropped shoulders and long, wide sleeves. Straight hem and ribbing at the cuffs and hem. Slightly longer at the back.\n", - "8 ##Product\\nName: Petar Sweater(1)\\nType: Sweater\\nGroup: Garment Upper body\\nGarment Type: Jersey Basic\\nDescription: Oversized top in sturdy sweatshirt fabric with dropped shoulders and ribbing around the neckline, cuffs and hem. Soft brushed inside.\n", - "9 ##Product\\nName: Sister off shoulder\\nType: Sweater\\nGroup: Garment Upper body\\nGarment Type: Jersey Fancy\\nDescription: Off-the-shoulder top in sweatshirt fabric with long, wide sleeves and ribbing around the top, cuffs and hem. Soft brushed inside." - ] + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "summary": "{\n \"name\": \"pd\",\n \"rows\": 10,\n \"fields\": [\n {\n \"column\": \"document\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 10,\n \"samples\": [\n \"##Product\\nName: Beat denim\\nType: Trousers\\nGroup: Garment Lower body\\nGarment Type: Trousers\\nDescription: High-waisted, ankle-length denim jeans with a zip fly and button, side pockets, welt back pockets and wide, straight legs with creases.\",\n \"##Product\\nName: Night Denim\\nType: Trousers\\nGroup: Garment Lower body\\nGarment Type: Dresses Ladies\\nDescription: High-waisted jeans in washed stretch denim with a zip fly and button and decorative zips on the waistband. Front and back pockets and skinny legs with a zip down the sides.\",\n \"##Product\\nName: &DENIM+ Skinny Shaping HW\\nType: Trousers\\nGroup: Garment Lower body\\nGarment Type: Trousers Denim\\nDescription: 5-pocket jeans in washed, stretch denim with a high waist, zip fly and button and skinny legs. Shaping \\u2013 denim with a stretch function that holds in and shapes the waist, thighs and bum while keeping the jeans in shape.\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } }, - "execution_count": 17, "metadata": {}, - "output_type": "execute_result" + "execution_count": 21 } ], "source": [ @@ -2001,7 +2560,7 @@ "__Using Graph Patterns to Improve Context in Search & Retrieval__\n", "\n", "Above, we saw how you can use the vector index to find semantic similar products in user searches. This is an extremely powerful tool; however, it is not the end-all be-all. It doesn't consider much of the customer data and isn't very personalized. Furthermore, some search\n", - "prompts, like \"Oversized Sweater,\" are very general and can match a large number of products, many of which won't be relevant to the specific user conducting the search.\n", + "prompts, like \"denim jeans\", are very general and can match a large number of products, many of which won't be relevant to the specific user conducting the search.\n", "\n", "We have a rich knowledge graph full of customer information; let's see how to leverage it to improve search experience." ] @@ -2113,7 +2672,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 11, "metadata": { "id": "8eKwXKf_ekeF" }, @@ -2153,20 +2712,65 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 12, "metadata": { "colab": { "base_uri": "https://localhost:8080/", - "height": 909 + "height": 597 }, "id": "rUCSQyrZekeF", - "outputId": "f15d17d1-2fbd-47db-bbd4-b9058e127924" + "outputId": "1697d18b-df37-4203-ad6b-63711419c3bc" }, "outputs": [ { + "output_type": "execute_result", "data": { + "text/plain": [ + " productCode \\\n", + "0 670698 \n", + "1 706016 \n", + "2 777038 \n", + "3 448509 \n", + "4 539723 \n", + ".. ... \n", + "10 557241 \n", + "11 252298 \n", + "12 598423 \n", + "13 727804 \n", + "14 652924 \n", + "\n", + " document \\\n", + "0 ##Product\\nName: Rachel HW Denim TRS\\nType: Trousers\\nGroup: Garment Lower body\\nGarment Type: Trousers\\nDescription: 5-pocket, ankle-length jeans in washed stretch denim in a relaxed fit with a high waist, zip fly and button and straight legs with cut-off, raw-edge hems. \n", + "1 ##Product\\nName: Jade HW Skinny Denim TRS\\nType: Trousers\\nGroup: Garment Lower body\\nGarment Type: Trousers\\nDescription: High-waisted jeans in washed superstretch denim with a zip fly and button, fake front pockets, real back pockets and super-skinny legs. \n", + "2 ##Product\\nName: Bono NW slim denim\\nType: Trousers\\nGroup: Garment Lower body\\nGarment Type: Trousers\\nDescription: 5-pocket, ankle-length jeans in washed slightly stretch denim with a high waist, zip fly and button and tapered legs. \n", + "3 ##Product\\nName: Perrie Slim Mom Denim TRS\\nType: Trousers\\nGroup: Garment Lower body\\nGarment Type: Trousers\\nDescription: 5-pocket, ankle-length jeans in washed, sturdy cotton denim with a high waist, button fly and slim, straight legs with raw-edge hems. \n", + "4 ##Product\\nName: Jade Denim TRS\\nType: Trousers\\nGroup: Garment Lower body\\nGarment Type: Trousers\\nDescription: High-waisted jeans in washed superstretch denim with a zip fly and button, fake front pockets, real back pockets and super-skinny legs. \n", + ".. ... \n", + "10 ##Product\\nName: Vintage Slim H.W\\nType: Trousers\\nGroup: Garment Lower body\\nGarment Type: Trousers Denim\\nDescription: 5-pocket ankle-length jeans in washed denim with a high waist, button fly and slim legs with sparkly stones at the front. \n", + "11 ##Product\\nName: Didi denim\\nType: Trousers\\nGroup: Garment Lower body\\nGarment Type: Dresses Ladies\\nDescription: Jeans in washed, stretch denim with hard-worn details, a regular waist, front and back pockets and skinny legs. \n", + "12 ##Product\\nName: Night Denim\\nType: Trousers\\nGroup: Garment Lower body\\nGarment Type: Dresses Ladies\\nDescription: High-waisted jeans in washed stretch denim with a zip fly and button and decorative zips on the waistband. Front and back pockets and skinny legs with a zip down the sides. \n", + "13 ##Product\\nName: Didi HW Skinny denim\\nType: Trousers\\nGroup: Garment Lower body\\nGarment Type: Trousers\\nDescription: Jeans in washed, stretch denim with hard-worn details, a high waist, visible button fly, front and back pockets and skinny legs. \n", + "14 ##Product\\nName: &DENIM Jeggings HW\\nType: Trousers\\nGroup: Garment Lower body\\nGarment Type: Trousers Denim\\nDescription: High-waisted jeggings in stretch denim with a zip fly and button, fake front pockets and real back pockets. \n", + "\n", + " searchScore purchaseScore \n", + "0 0.922637 22 \n", + "1 0.926736 11 \n", + "2 0.926354 8 \n", + "3 0.924594 5 \n", + "4 0.928005 3 \n", + ".. ... ... \n", + "10 0.923155 1 \n", + "11 0.938433 0 \n", + "12 0.936743 0 \n", + "13 0.934681 0 \n", + "14 0.934484 0 \n", + "\n", + "[15 rows x 4 columns]" + ], "text/html": [ - "
\n", + "\n", + "
\n", + "
\n", "\n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "summary": "{\n \"name\": \" 'purchaseScore': d\",\n \"rows\": 15,\n \"fields\": [\n {\n \"column\": \"productCode\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 148368,\n \"min\": 252298,\n \"max\": 788575,\n \"num_unique_values\": 15,\n \"samples\": [\n 779659,\n 252298,\n 670698\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"document\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 15,\n \"samples\": [\n \"##Product\\nName: Perrie Fancy Denim TRS\\nType: Trousers\\nGroup: Garment Lower body\\nGarment Type: Trousers\\nDescription: 5-pocket, ankle-length jeans in washed denim with decorative V-shaped seams at the top. Slightly looser fit with an extra high waist, zip fly and button and tapered legs.\",\n \"##Product\\nName: Didi denim\\nType: Trousers\\nGroup: Garment Lower body\\nGarment Type: Dresses Ladies\\nDescription: Jeans in washed, stretch denim with hard-worn details, a regular waist, front and back pockets and skinny legs.\",\n \"##Product\\nName: Rachel HW Denim TRS\\nType: Trousers\\nGroup: Garment Lower body\\nGarment Type: Trousers\\nDescription: 5-pocket, ankle-length jeans in washed stretch denim in a relaxed fit with a high waist, zip fly and button and straight legs with cut-off, raw-edge hems.\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"searchScore\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.005176947161549824,\n \"min\": 0.9226365089416504,\n \"max\": 0.9384325742721558,\n \"num_unique_values\": 15,\n \"samples\": [\n 0.9235161542892456,\n 0.9384325742721558,\n 0.9226365089416504\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"purchaseScore\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 5,\n \"min\": 0,\n \"max\": 22,\n \"num_unique_values\": 8,\n \"samples\": [\n 11,\n 2,\n 22\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } }, - "execution_count": 19, "metadata": {}, - "output_type": "execute_result" + "execution_count": 12 } ], "source": [ @@ -2366,43 +3140,52 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 24, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "PtGT6HfgekeG", - "outputId": "5dc1926a-10ae-428c-9c94-40dd8185d439" + "outputId": "52052aa4-ff09-41f8-d281-611badb07360" }, "outputs": [ { - "name": "stdout", "output_type": "stream", + "name": "stderr", + "text": [ + "WARNING:neo4j.notifications:Received notification from DBMS server: {severity: WARNING} {code: Neo.ClientNotification.Statement.FeatureDeprecationWarning} {category: DEPRECATION} {title: This feature is deprecated and will be removed in future versions.} {description: The query used a deprecated field from a procedure. ('schema' returned by 'gds.graph.list' is deprecated.)} {position: line: 1, column: 1, offset: 0} for query: 'CALL gds.graph.list()'\n", + "WARNING:neo4j.notifications:Received notification from DBMS server: {severity: WARNING} {code: Neo.ClientNotification.Statement.FeatureDeprecationWarning} {category: DEPRECATION} {title: This feature is deprecated and will be removed in future versions.} {description: The query used a deprecated procedure. ('gds.graph.writeNodeProperties' has been replaced by 'gds.graph.nodeProperties.write')} {position: line: 1, column: 1, offset: 0} for query: 'CALL gds.graph.writeNodeProperties($graph_name, $properties, $entities, $config)'\n", + "WARNING:neo4j.notifications:Received notification from DBMS server: {severity: WARNING} {code: Neo.ClientNotification.Statement.FeatureDeprecationWarning} {category: DEPRECATION} {title: This feature is deprecated and will be removed in future versions.} {description: The query used a deprecated field from a procedure. ('schema' returned by 'gds.graph.drop' is deprecated.)} {position: line: 1, column: 1, offset: 0} for query: 'CALL gds.graph.drop($graph_name, $failIfMissing)'\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", "text": [ - "CPU times: user 203 ms, sys: 27.7 ms, total: 230 ms\n", - "Wall time: 1min 10s\n" + "CPU times: user 116 ms, sys: 15.3 ms, total: 131 ms\n", + "Wall time: 8.82 s\n" ] }, { + "output_type": "execute_result", "data": { "text/plain": [ "ranIterations 6\n", "didConverge True\n", "nodePairsConsidered 6968692\n", "preProcessingMillis 0\n", - "computeMillis 14699\n", + "computeMillis 2948\n", " ... \n", "postProcessingMillis 0\n", "nodesCompared 13296\n", "relationshipsWritten 127833\n", "similarityDistribution {'min': 0.7500038146972656, 'p5': 0.8663902282714844, 'max': 1.0000038146972656, 'p99': 1.0000038146972656, 'p1': 0.7741241455078125, 'p10': 0.9144134521484375, 'p90': 1.0000038146972656, 'p50': 0.9999923706054688, 'p25': 0.9888267517089844, 'p75': 0.9999961853027344, 'p95': 1.0000038146972656, 'mean': 0.9787080660669286, 'p100': 1.0000038146972656, 'stdDev': 0.04805960041168029}\n", - "configuration {'writeProperty': 'score', 'writeRelationshipType': 'CUSTOMERS_ALSO_LIKE', 'randomSeed': 7474, 'jobId': 'db658f4d-51f0-4055-88ff-768c0497763b', 'deltaThreshold': 0.001, 'topK': 10, 'similarityCutoff': 0.75, 'perturbationRate': 0.0, 'sudo': False, 'maxIterations': 100, 'writeConcurrency': 1, 'sampleRate': 1.0, 'initialSampler': 'RANDOMWALK', 'nodeProperties': {'embedding': 'COSINE'}, 'logProgress': True, 'nodeLabels': ['Article'], 'randomJoins': 10, 'concurrency': 1, 'relationshipTypes': ['*']}\n", + "configuration {'writeProperty': 'score', 'writeRelationshipType': 'CUSTOMERS_ALSO_LIKE', 'randomSeed': 7474, 'jobId': 'b7baf3ab-6575-4cb0-ac07-bfa035385575', 'deltaThreshold': 0.001, 'topK': 10, 'similarityCutoff': 0.75, 'perturbationRate': 0.0, 'sudo': False, 'maxIterations': 100, 'writeConcurrency': 1, 'sampleRate': 1.0, 'initialSampler': 'RANDOMWALK', 'nodeProperties': {'embedding': 'COSINE'}, 'logProgress': True, 'nodeLabels': ['Article'], 'randomJoins': 10, 'concurrency': 1, 'relationshipTypes': ['*']}\n", "Name: 0, Length: 11, dtype: object" ] }, - "execution_count": 20, "metadata": {}, - "output_type": "execute_result" + "execution_count": 24 } ], "source": [ @@ -2458,7 +3241,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -2699,20 +3482,65 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 19, "metadata": { "colab": { "base_uri": "https://localhost:8080/", - "height": 1000 + "height": 597 }, "id": "NVy9J6T5ekeH", - "outputId": "ec61f291-e38e-4836-966a-b0d51f7a5116" + "outputId": "c02e33f3-0b65-40a8-bf28-2d90e30bb410" }, "outputs": [ { + "output_type": "execute_result", "data": { + "text/plain": [ + " productCode prodName productType \\\n", + "0 569974 DONT USE ROLAND HOOD Hoodie \n", + "1 656401 PASTRY SWEATER Sweater \n", + "2 682848 Skinny RW Ankle Milo Zip Trousers \n", + "3 660519 Haven back detail Bra \n", + "4 642498 Bubble Bum Bandeau (1) Bikini top \n", + ".. ... ... ... \n", + "10 620425 Karin headband Hairband \n", + "11 662328 Survivor Blouse \n", + "12 753724 Rosemary Dress \n", + "13 557247 Petar Sweater(1) Sweater \n", + "14 731142 Lead Superskinny Trousers \n", + "\n", + " document \\\n", + "0 ##Product\\nName: DONT USE ROLAND HOOD\\nType: Hoodie\\nGroup: Garment Upper body\\nGarment Type: Jersey Basic\\nDescription: Top in sweatshirt fabric with a lined drawstring hood, kangaroo pocket, long raglan sleeves and ribbing at the cuffs and hem. \n", + "1 ##Product\\nName: PASTRY SWEATER\\nType: Sweater\\nGroup: Garment Upper body\\nGarment Type: Knitwear\\nDescription: Jumper in soft, textured-knit cotton with long raglan sleeves and ribbing around the neckline, cuffs and hem. \n", + "2 ##Product\\nName: Skinny RW Ankle Milo Zip\\nType: Trousers\\nGroup: Garment Lower body\\nGarment Type: Trousers Denim\\nDescription: 5-pocket, ankle-length jeans in washed stretch denim with hard-worn details, a regular waist, zip fly and button, and skinny legs with a zip at the hems. The jeans are made partly from recycled cotton. \n", + "3 ##Product\\nName: Haven back detail\\nType: Bra\\nGroup: Underwear\\nGarment Type: Under-, Nightwear\\nDescription: Push-up bra in lace and mesh with underwired, moulded, padded cups for a larger bust and fuller cleavage. Lace racer back, narrow adjustable shoulder straps, a wide mesh strap at the back and a metal fastener at the front. \n", + "4 ##Product\\nName: Bubble Bum Bandeau (1)\\nType: Bikini top\\nGroup: Swimwear\\nGarment Type: Swimwear\\nDescription: Fully lined bandeau bikini top with padded cups and removable inserts. Detachable ties at the back of the neck, ties at the back, side support and a silicone trim at the top. \n", + ".. ... \n", + "10 ##Product\\nName: Karin headband\\nType: Hairband\\nGroup: Accessories\\nGarment Type: Accessories\\nDescription: Wide hairband in cotton jersey with a twisted detail. \n", + "11 ##Product\\nName: Survivor\\nType: Blouse\\nGroup: Garment Upper body\\nGarment Type: Blouses\\nDescription: Straight-cut blouse in a crêpe weave with a collar, concealed buttons down the front and fake flap front pockets. Yoke with a pleat at the back, long sleeves with pleats and buttoned cuffs, and a straight cut hem with slits in the sides. \n", + "12 ##Product\\nName: Rosemary\\nType: Dress\\nGroup: Garment Full body\\nGarment Type: Dresses Ladies\\nDescription: Short dress in woven fabric with 3/4-length sleeves with an opening and ties at the cuffs, and a gently rounded hem. Unlined. \n", + "13 ##Product\\nName: Petar Sweater(1)\\nType: Sweater\\nGroup: Garment Upper body\\nGarment Type: Jersey Basic\\nDescription: Oversized top in sturdy sweatshirt fabric with dropped shoulders and ribbing around the neckline, cuffs and hem. Soft brushed inside. \n", + "14 ##Product\\nName: Lead Superskinny\\nType: Trousers\\nGroup: Garment Lower body\\nGarment Type: Trousers\\nDescription: Chinos in stretch twill with a zip fly and button, side pockets, welt back pockets and skinny legs. \n", + "\n", + " recommenderScore \n", + "0 28.999712 \n", + "1 16.999836 \n", + "2 14.999846 \n", + "3 12.999863 \n", + "4 11.999878 \n", + ".. ... \n", + "10 8.999906 \n", + "11 8.999906 \n", + "12 6.999929 \n", + "13 6.999927 \n", + "14 6.999925 \n", + "\n", + "[15 rows x 5 columns]" + ], "text/html": [ - "
\n", + "\n", + "
\n", + "
\n", "\n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "summary": "{\n \"name\": \"pd\",\n \"rows\": 15,\n \"fields\": [\n {\n \"column\": \"productCode\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 118937,\n \"min\": 244267,\n \"max\": 753724,\n \"num_unique_values\": 15,\n \"samples\": [\n 511924,\n 662328,\n 569974\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"prodName\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 15,\n \"samples\": [\n \"Leona Push Mirny\",\n \"Survivor\",\n \"DONT USE ROLAND HOOD\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"productType\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 10,\n \"samples\": [\n \"Blouse\",\n \"Sweater\",\n \"T-shirt\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"document\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 15,\n \"samples\": [\n \"##Product\\nName: Leona Push Mirny\\nType: Bra\\nGroup: Underwear\\nGarment Type: Under-, Nightwear\\nDescription: Push-up bra in lace and mesh with underwired, moulded, padded cups for a larger bust and fuller cleavage. Lace racer back, narrow adjustable shoulder straps, a wide mesh strap at the back and metal fastener at the front.\",\n \"##Product\\nName: Survivor\\nType: Blouse\\nGroup: Garment Upper body\\nGarment Type: Blouses\\nDescription: Straight-cut blouse in a cr\\u00eape weave with a collar, concealed buttons down the front and fake flap front pockets. Yoke with a pleat at the back, long sleeves with pleats and buttoned cuffs, and a straight cut hem with slits in the sides.\",\n \"##Product\\nName: DONT USE ROLAND HOOD\\nType: Hoodie\\nGroup: Garment Upper body\\nGarment Type: Jersey Basic\\nDescription: Top in sweatshirt fabric with a lined drawstring hood, kangaroo pocket, long raglan sleeves and ribbing at the cuffs and hem.\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"recommenderScore\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 5.553152121233309,\n \"min\": 6.999925434589386,\n \"max\": 28.99971240758896,\n \"num_unique_values\": 15,\n \"samples\": [\n 8.99990850687027,\n 8.99990564584732,\n 28.99971240758896\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } }, - "execution_count": 22, "metadata": {}, - "output_type": "execute_result" + "execution_count": 19 } ], "source": [ @@ -2916,14 +3914,14 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 13, "metadata": { "id": "JI9LVEdKekeH" }, "outputs": [], "source": [ "# Import relevant libraries\n", - "from langchain.prompts import SystemMessagePromptTemplate, HumanMessagePromptTemplate, ChatPromptTemplate\n", + "from langchain.prompts import PromptTemplate\n", "from langchain_openai import ChatOpenAI\n", "from langchain.schema import StrOutputParser\n", "\n", @@ -2947,7 +3945,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 14, "metadata": { "id": "WLBBVRXwdyXq" }, @@ -2968,9 +3966,9 @@ " OPTIONAL MATCH(product)<-[:VARIANT_OF]-(:Article)<-[:PURCHASED]-(:Customer)\n", " -[:PURCHASED]->(a:Article)<-[:PURCHASED]-(:Customer {{customerId: '{customer_id}'}})\n", " WITH count(a) AS purchaseScore, product, searchScore\n", - " RETURN product.text + '\\nurl: ' + 'https://representative-domain/product/' + product.productCode AS text,\n", + " RETURN product.text + '\\nurl: ' + product.url AS text,\n", " (1.0+purchaseScore)*searchScore AS score,\n", - " {{source: 'https://representative-domain/product/' + product.productCode}} AS metadata\n", + " {{source: product.url }} AS metadata\n", " ORDER BY purchaseScore DESC, searchScore DESC LIMIT 5\n", "\n", " \"\"\"\n", @@ -2981,7 +3979,7 @@ " res = kg.query(\"\"\"\n", " MATCH(:Customer {customerId:$customerId})-[:PURCHASED]->(:Article)\n", " -[r:CUSTOMERS_ALSO_LIKE]->(:Article)-[:VARIANT_OF]->(product)\n", - " RETURN product.text + '\\nurl: ' + 'https://representative-domain/product/' + product.productCode AS text,\n", + " RETURN product.text + '\\nurl: ' + product.url AS text,\n", " sum(r.score) AS recommenderScore\n", " ORDER BY recommenderScore DESC LIMIT $k\n", " \"\"\", params={'customerId': customer_id, 'k':k})\n", @@ -2998,43 +3996,44 @@ "source": [ "### Prompt Engineering\n", "\n", - "Now, let's define our prompts. We will combine two:\n", - "1. A system prompt which, in this case, tells the LLM how to generate the message\n", - "2. A human prompt that just wraps the customer search(es)/interest(s)\n", - "\n", - "This will allow us to pass the customer interest(s) to the retriever but then also to the LLM for additional context when drafting the message.\n" + "Now let's define our prompt. We will accept multiple parameters and provide detailed instructions to the LLM to condition the response based of retrieved data, customer interests, and time of year.\n" ] }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 21, "metadata": { "id": "aUAROR6aekeI" }, "outputs": [], "source": [ - "general_system_template = '''\n", - "You are a personal assistant named Sally for a fashion, home, and beauty company called HRM.\n", - "write an email to {customerName}, one of your customers, to promote and summarize products relevant for them given the current season / time of year: {timeOfYear} .\n", - "Please only mention the products listed below. Do not come up with or add any new products to the list.\n", - "Each product comes with an https `url` field. Make sure to provide that https url with descriptive name text in markdown for each product.\n", + "prompt = PromptTemplate.from_template('You are a personal assistant named Sally '\n", + "'for a fashion, home, and beauty company called HRM.'\n", + "'write an engaging email to {customerName}, one of your customers, '\n", + "'to promote and summarize products relevant for them given: '\n", + "'- The current season / time of year: {timeOfYear}'\n", + "'- Recent searches/interests: {customerInterests}'\n", + "'Please only mention the products listed below. '\n", + "'Do not come up with or add any new products to the list.'\n", + "'Each product comes with an https `url` field. '\n", + "'Make sure to provide that https url with descriptive name text '\n", + "'in markdown for each product.'\n", + "'''\n", "\n", - "---\n", - "# Relevant Products:\n", + "# RelevantProducts:\n", + "These are products from the HRM store the customer may be interested in based\n", + "on their recent searches/interests: {customerInterests}\n", "{searchProds}\n", "\n", "# Customer May Also Be Interested In the following\n", - " (pick items from here that pair with the above products well for the current season / time of year: {timeOfYear}.\n", - " prioritize those higher in the list if possible):\n", + "The below candidates are recommended based on the shared purchase patterns of\n", + "other customers in the HRM database.\n", + "Select the best 4 to 5 product subset from the context that best match the\n", + "time of year: {timeOfYear} and to pair with the RelevantProducts above.\n", + "For example, even if scarfs are listed here, they may not be appropriate for a\n", + "summer time of year so best not to include those.\n", "{recProds}\n", - "---\n", - "'''\n", - "general_user_template = \"{searchPrompt}\"\n", - "messages = [\n", - " SystemMessagePromptTemplate.from_template(general_system_template),\n", - " HumanMessagePromptTemplate.from_template(general_user_template),\n", - "]\n", - "prompt = ChatPromptTemplate.from_messages(messages)" + "''')" ] }, { @@ -3046,7 +4045,7 @@ "source": [ "### Create a Chain\n", "\n", - "Now let's put a chain together that will leverage the retrievers, prompts, and LLM model. This is where Langchain shines, putting RAG together in a simple way.\n", + "Now let's put a chain together that will leverage the retrievers, prompt, and LLM model. This is where Langchain shines, putting RAG together in a simple way.\n", "\n", "In addition to the personalized search and recommendations context, we will allow for some other parameters.\n", "\n", @@ -3058,7 +4057,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 22, "metadata": { "id": "nUpih07QdyXr" }, @@ -3072,11 +4071,14 @@ "\n", "# LLM chain\n", "def chain_gen(customer_id):\n", - " return ({'searchProds': (lambda x:x['searchPrompt']) | kg_personalized_search_gen(customer_id).as_retriever(search_kwargs={\"k\": 100}) | format_docs,\n", - " 'recProds': (lambda x:customer_id) | RunnableLambda(kg_recommendations_app),\n", + " return ({'searchProds': (lambda x:x['customerInterests'])\n", + " | kg_personalized_search_gen(customer_id).as_retriever(search_kwargs={\"k\": 100})\n", + " | format_docs,\n", + " 'recProds': (lambda x:customer_id)\n", + " | RunnableLambda(kg_recommendations_app),\n", " 'customerName': lambda x:x['customerName'],\n", " 'timeOfYear': lambda x:x['timeOfYear'],\n", - " \"searchPrompt\": lambda x:x['searchPrompt']}\n", + " \"customerInterests\": lambda x:x['customerInterests']}\n", " | prompt\n", " | llm\n", " | StrOutputParser())" @@ -3094,7 +4096,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 23, "metadata": { "id": "lkBdqOVjekeI" }, @@ -3105,54 +4107,76 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 24, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "jL6P3IoydyXr", - "outputId": "c6260e6f-982a-4f74-b8a3-64c09476abae" + "outputId": "a810cc90-0e50-415b-bca7-4a388510d1f8" }, "outputs": [ { - "name": "stdout", "output_type": "stream", + "name": "stdout", "text": [ - "Subject: Stay Warm and Stylish with HRM's Seasonal Sweaters and More!\n", + "Subject: Discover Your Perfect Denim and Summer Essentials at HRM!\n", + "\n", + "Dear Alex,\n", + "\n", + "I hope this email finds you well and enjoying the start of summer! As the days get warmer, it's the perfect time to refresh your wardrobe with some stylish and comfortable pieces. Based on your recent interest in denim jeans, I’ve curated a selection of our top denim products along with some summer essentials that I think you'll love.\n", "\n", - "Dear Alex Smith,\n", + "### Denim Delights\n", "\n", - "As we navigate through the chilly month of February, HRM has curated a selection of cozy and stylish products to keep you warm and fashionable. Here are some of our top picks for you:\n", + "1. **[Rachel HW Denim TRS](https://representative-domain/product/670698)**\n", + " - 5-pocket, ankle-length jeans in washed stretch denim with a relaxed fit, high waist, and straight legs with cut-off, raw-edge hems.\n", "\n", - "1. [Queen Sweater](https://representative-domain/product/677930): This lightweight sweatshirt fabric top with ribbing around the neckline, cuffs, and hem is perfect for layering.\n", + "2. **[Jade HW Skinny Denim TRS](https://representative-domain/product/706016)**\n", + " - High-waisted jeans in washed superstretch denim with a zip fly and button, fake front pockets, real back pockets, and super-skinny legs.\n", "\n", - "2. [Jess Oversize LS](https://representative-domain/product/516712): An oversized top in soft jersey made from a cotton blend with dropped shoulders and long sleeves. Ideal for a relaxed, comfortable look.\n", + "3. **[Bono NW Slim Denim](https://representative-domain/product/777038)**\n", + " - 5-pocket, ankle-length jeans in washed slightly stretch denim with a high waist, zip fly and button, and tapered legs.\n", "\n", - "3. [Petar Sweater(1)](https://representative-domain/product/557247): This oversized top in sturdy sweatshirt fabric with dropped shoulders and ribbing around the neckline, cuffs, and hem has a soft brushed inside for extra warmth.\n", + "4. **[Perrie Slim Mom Denim TRS](https://representative-domain/product/448509)**\n", + " - 5-pocket, ankle-length jeans in washed, sturdy cotton denim with a high waist, button fly, and slim, straight legs with raw-edge hems.\n", "\n", - "4. [Family Crew Ladies](https://representative-domain/product/686265): A top in sweatshirt fabric with dropped shoulders, long sleeves, and ribbing around the neckline, cuffs, and hem. The soft brushed inside makes it a cozy choice.\n", + "5. **[Jade Denim TRS](https://representative-domain/product/539723)**\n", + " - High-waisted jeans in washed superstretch denim with a zip fly and button, fake front pockets, real back pockets, and super-skinny legs.\n", "\n", - "5. [Irma Sweater](https://representative-domain/product/669682): This top in printed sweatshirt fabric with dropped shoulders, long sleeves, and ribbing around the neckline, cuffs, and hem adds a pop of pattern to your winter wardrobe.\n", + "### Summer Essentials\n", "\n", - "To complete your winter look, we recommend pairing these sweaters with the following items:\n", + "To complement your new denim, here are some summer must-haves that are perfect for the season:\n", "\n", - "1. [Skinny RW Ankle Milo Zip Trousers](https://representative-domain/product/682848): These ankle-length jeans in washed stretch denim with hard-worn details and a zip at the hems are perfect for a casual day out.\n", + "1. **[Dixie Tee](https://representative-domain/product/598806)**\n", + " - Short top in soft cotton jersey with short sleeves and contrasting color trims around the neckline and sleeves.\n", "\n", - "2. [Karin Headband](https://representative-domain/product/620425): This wide hairband in cotton jersey with a twisted detail can add a touch of style to your outfit while keeping your hair in place.\n", + "2. **[Rylee Flatform](https://representative-domain/product/606711)**\n", + " - Sandals with imitation suede straps, an elastic heel strap, and wedge heels. Satin insoles and thermoplastic rubber (TPR) soles. Platform front 2 cm, heel 6 cm.\n", "\n", - "3. [Pastry Sweater](https://representative-domain/product/656401): This jumper in soft, textured-knit cotton with long raglan sleeves and ribbing around the neckline, cuffs, and hem can be layered under your sweaters for extra warmth.\n", + "3. **[Bubble Bum Bandeau](https://representative-domain/product/642498)**\n", + " - Fully lined bandeau bikini top with padded cups and removable inserts. Detachable ties at the back of the neck, ties at the back, side support, and a silicone trim at the top.\n", "\n", - "Stay warm and stylish with HRM this winter. We look forward to serving you soon!\n", + "4. **[Gwen Jersey Top](https://representative-domain/product/671852)**\n", + " - Fitted top in stretch jersey with a slight sheen. V-neck with a lace trim at the top and adjustable spaghetti straps.\n", "\n", - "Best Regards,\n", + "5. **[Rosemary Dress](https://representative-domain/product/753724)**\n", + " - Short dress in woven fabric with 3/4-length sleeves with an opening and ties at the cuffs, and a gently rounded hem. Unlined.\n", "\n", - "Sally\n", - "HRM Personal Assistant\n" + "These pieces are perfect for creating versatile and stylish outfits that will keep you comfortable and chic all summer long. \n", + "\n", + "Feel free to explore these products and more on our website. If you have any questions or need further assistance, don't hesitate to reach out. Happy shopping!\n", + "\n", + "Best regards,\n", + "\n", + "Sally \n", + "Personal Assistant \n", + "HRM Fashion, Home, and Beauty \n", + "\n" ] } ], "source": [ - "print(chain.invoke({'searchPrompt':search_prompt, 'customerName':'Alex Smith', 'timeOfYear':'Feb, 2024'}))" + "print(chain.invoke({'customerInterests':search_prompt, 'customerName':'Alex Smith', 'timeOfYear':'Jun, 2024'}))" ] }, { @@ -3167,71 +4191,72 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 26, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "Z7-yDDUaD6FD", - "outputId": "686d5f56-2cb8-49bc-8e29-6eb47cb40762" + "outputId": "e42c7ee6-e60d-401e-b9d5-c4ee3b42dd5b" }, "outputs": [ { - "name": "stdout", "output_type": "stream", + "name": "stdout", "text": [ "=== Prompt to send to LLM ===\n", - " System: \n", - "You are a personal assistant named Sally for a fashion, home, and beauty company called HRM.\n", - "write an email to Alex Smith, one of your customers, to promote and summarize products relevant for them given the current season / time of year: Feb, 2024 .\n", - "Please only mention the products listed below. Do not come up with or add any new products to the list.\n", - "Each product comes with an https `url` field. Make sure to provide that https url with descriptive name text in markdown for each product.\n", - "\n", - "---\n", - "# Relevant Products:\n", + " You are a personal assistant named Sally for a fashion, home, and beauty company called HRM.write an engaging email to Alex Smith, one of your customers, to promote and summarize products relevant for them given: - The current season / time of year: Feb, 2024- Recent searches/interests: denim jeansPlease only mention the products listed below. Do not come up with or add any new products to the list.Each product comes with an https `url` field. Make sure to provide that https url with descriptive name text in markdown for each product.\n", + "\n", + "# RelevantProducts:\n", + "These are products from the HRM store the customer may be interested in based \n", + "on their recent searches/interests: denim jeans\n", "##Product\n", - "Name: Queen Sweater\n", - "Type: Sweater\n", - "Group: Garment Upper body\n", - "Garment Type: Jersey Basic\n", - "Description: Top in lightweight sweatshirt fabric with ribbing around the neckline, cuffs and hem.\n", - "url: https://representative-domain/product/677930\n", + "Name: Rachel HW Denim TRS\n", + "Type: Trousers\n", + "Group: Garment Lower body\n", + "Garment Type: Trousers\n", + "Description: 5-pocket, ankle-length jeans in washed stretch denim in a relaxed fit with a high waist, zip fly and button and straight legs with cut-off, raw-edge hems.\n", + "url: https://representative-domain/product/670698\n", "\n", "##Product\n", - "Name: Jess oversize LS\n", - "Type: Top\n", - "Group: Garment Upper body\n", - "Garment Type: Jersey Basic\n", - "Description: Oversized top in soft jersey made from a cotton blend with dropped shoulders and long sleeves.\n", - "url: https://representative-domain/product/516712\n", + "Name: Jade HW Skinny Denim TRS\n", + "Type: Trousers\n", + "Group: Garment Lower body\n", + "Garment Type: Trousers\n", + "Description: High-waisted jeans in washed superstretch denim with a zip fly and button, fake front pockets, real back pockets and super-skinny legs.\n", + "url: https://representative-domain/product/706016\n", "\n", "##Product\n", - "Name: Petar Sweater(1)\n", - "Type: Sweater\n", - "Group: Garment Upper body\n", - "Garment Type: Jersey Basic\n", - "Description: Oversized top in sturdy sweatshirt fabric with dropped shoulders and ribbing around the neckline, cuffs and hem. Soft brushed inside.\n", - "url: https://representative-domain/product/557247\n", + "Name: Bono NW slim denim\n", + "Type: Trousers\n", + "Group: Garment Lower body\n", + "Garment Type: Trousers\n", + "Description: 5-pocket, ankle-length jeans in washed slightly stretch denim with a high waist, zip fly and button and tapered legs.\n", + "url: https://representative-domain/product/777038\n", "\n", "##Product\n", - "Name: Family Crew Ladies\n", - "Type: Sweater\n", - "Group: Garment Upper body\n", - "Garment Type: Special Offers\n", - "Description: Top in sweatshirt fabric with dropped shoulders, long sleeves and ribbing around the neckline, cuffs and hem. Soft brushed inside.\n", - "url: https://representative-domain/product/686265\n", + "Name: Perrie Slim Mom Denim TRS\n", + "Type: Trousers\n", + "Group: Garment Lower body\n", + "Garment Type: Trousers\n", + "Description: 5-pocket, ankle-length jeans in washed, sturdy cotton denim with a high waist, button fly and slim, straight legs with raw-edge hems.\n", + "url: https://representative-domain/product/448509\n", "\n", "##Product\n", - "Name: Irma sweater\n", - "Type: Sweater\n", - "Group: Garment Upper body\n", - "Garment Type: Jersey Fancy\n", - "Description: Top in printed sweatshirt fabric with dropped shoulders, long sleeves and ribbing around the neckline, cuffs and hem.\n", - "url: https://representative-domain/product/669682\n", + "Name: Jade Denim TRS\n", + "Type: Trousers\n", + "Group: Garment Lower body\n", + "Garment Type: Trousers\n", + "Description: High-waisted jeans in washed superstretch denim with a zip fly and button, fake front pockets, real back pockets and super-skinny legs.\n", + "url: https://representative-domain/product/539723\n", "\n", "# Customer May Also Be Interested In the following\n", - " (pick items from here that pair with the above products well for the current season / time of year: Feb, 2024.\n", - " prioritize those higher in the list if possible):\n", + "The below candidates are recommended based on the shared purchase patterns of \n", + "other customers in the HRM database.\n", + "Select the best 4 to 5 product subset from the context that best match the \n", + "time of year: Feb, 2024 and to pair with the RelevantProducts above. \n", + "For example, even if scarfs are listed here, they may not be appropriate for a \n", + "summer time of year so best not to include those.\n", "##Product\n", "Name: DONT USE ROLAND HOOD\n", "Type: Hoodie\n", @@ -3471,9 +4496,7 @@ "Garment Type: Jersey Fancy\n", "Description: Polo shirt in cotton piqué with a ribbed collar, button placket, short sleeves with ribbed trims, and slits in the sides.\n", "url: https://representative-domain/product/816759\n", - "---\n", "\n", - "Human: Oversized Sweaters\n", " === End Prompt ===\n", " \n" ] @@ -3487,17 +4510,17 @@ " '''\n", "\n", "def chain_print_prompt(customer_id):\n", - " return ({'searchProds': (lambda x:x['searchPrompt']) | kg_personalized_search_gen(customer_id).as_retriever(search_kwargs={\"k\": 100}) | format_docs,\n", + " return ({'searchProds': (lambda x:x['customerInterests']) | kg_personalized_search_gen(customer_id).as_retriever(search_kwargs={\"k\": 100}) | format_docs,\n", " 'recProds': (lambda x:customer_id) | RunnableLambda(kg_recommendations_app),\n", " 'customerName': lambda x:x['customerName'],\n", " 'timeOfYear': lambda x:x['timeOfYear'],\n", - " \"searchPrompt\": lambda x:x['searchPrompt']}\n", + " \"customerInterests\": lambda x:x['customerInterests']}\n", " | prompt\n", " | format_final_prompt\n", " | StrOutputParser())\n", "\n", "print( chain_print_prompt(CUSTOMER_ID)\\\n", - " .invoke({'searchPrompt':search_prompt, 'customerName':'Alex Smith', 'timeOfYear':'Feb, 2024'}))" + " .invoke({'customerInterests':search_prompt, 'customerName':'Alex Smith', 'timeOfYear':'Feb, 2024'}))" ] }, { @@ -3512,13 +4535,71 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 27, "metadata": { - "id": "qeOts3Q4ZACL" + "id": "qeOts3Q4ZACL", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "5dc2fed0-03a7-47be-8c30-e7590f08d084" }, - "outputs": [], + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Subject: Discover Your Perfect Summer Style with HRM!\n", + "\n", + "Dear Alex,\n", + "\n", + "I hope this email finds you well and enjoying the summer season! As July unfolds, it's the perfect time to refresh your wardrobe with some stylish and versatile pieces. Based on your recent interest in western boots, we've curated a selection of products that we think you'll love. \n", + "\n", + "### Step Up Your Style with These Boots\n", + "\n", + "1. **[Harry Hiking Boot](https://representative-domain/product/817484)**\n", + " - Boots in sturdy cotton canvas with an ankle-height shaft, lacing at the front, and a loop at the back. Chunky, patterned soles with a platform front of 4.5 cm and a heel of 6 cm.\n", + "\n", + "2. **[Patsy Platform](https://representative-domain/product/752857)**\n", + " - Platform boots in imitation leather with a zip on one side, lacing at the front, and a loop at the back. Decorative welt seams and chunky soles with a platform front of 4 cm and a heel of 5 cm.\n", + "\n", + "### Pair Your Boots with Trendy Denim\n", + "\n", + "1. **[Bono NW Slim Denim](https://representative-domain/product/777038)**\n", + " - 5-pocket, ankle-length jeans in washed slightly stretch denim with a high waist, zip fly, button, and tapered legs.\n", + "\n", + "2. **[Vintage Ankle H.W](https://representative-domain/product/426013)**\n", + " - 5-pocket ankle-length jeans in washed denim with worn details, a high waist, low crotch, button fly, and straight legs.\n", + "\n", + "3. **[Vintage Slim H.W](https://representative-domain/product/557241)**\n", + " - 5-pocket ankle-length jeans in washed denim with a high waist, button fly, and slim legs adorned with sparkly stones at the front.\n", + "\n", + "### Summer Essentials to Complete Your Look\n", + "\n", + "1. **[Dixie Tee](https://representative-domain/product/598806)**\n", + " - Short top in soft cotton jersey with short sleeves and contrasting color trims around the neckline and sleeves.\n", + "\n", + "2. **[Rylee Flatform](https://representative-domain/product/606711)**\n", + " - Sandals with imitation suede straps, an elastic heel strap, and wedge heels. Satin insoles and thermoplastic rubber (TPR) soles with a platform front of 2 cm and a heel of 6 cm.\n", + "\n", + "3. **[Bubble Bum Bandeau](https://representative-domain/product/642498)**\n", + " - Fully lined bandeau bikini top with padded cups and removable inserts. Detachable ties at the back of the neck, ties at the back, side support, and a silicone trim at the top.\n", + "\n", + "4. **[Gwen Jersey Top](https://representative-domain/product/671852)**\n", + " - Fitted top in stretch jersey with a slight sheen. V-neck with a lace trim at the top and adjustable spaghetti straps.\n", + "\n", + "We hope these selections inspire you to create some fabulous summer outfits. If you have any questions or need further assistance, feel free to reach out. Happy shopping!\n", + "\n", + "Warm regards,\n", + "\n", + "Sally \n", + "Personal Assistant \n", + "HRM Fashion, Home, and Beauty \n", + "\n" + ] + } + ], "source": [ - "print(chain.invoke({'searchPrompt':\"western boots\", 'customerName':'Alex Smith', 'timeOfYear':'Feb, 2024'}))" + "print(chain.invoke({'customerInterests':\"western boots\", 'customerName':'Alex Smith', 'timeOfYear':'July, 2024'}))" ] }, { @@ -3534,7 +4615,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 28, "metadata": { "id": "A1F0ve3cekeI" }, @@ -3553,15 +4634,21 @@ "examples = [\n", " [\n", " CUSTOMER_ID,\n", - " 'Feb, 2024',\n", + " 'June, 2024',\n", " 'Alex Smith',\n", - " 'Oversized Sweaters'\n", + " 'denim jeans'\n", + " ],\n", + " [\n", + " CUSTOMER_ID,\n", + " 'July, 2024',\n", + " 'Alex Smith',\n", + " 'western boots'\n", " ],\n", " [\n", " '819f4eab1fd76b932fd403ae9f427de8eb9c5b64411d763bb26b5c8c3c30f16f',\n", - " 'Feb, 2024',\n", + " 'June, 2024',\n", " 'Robin Fischer',\n", - " 'Oversized Sweaters'\n", + " 'denim jeans'\n", " ],\n", " [\n", " '44b0898ecce6cc1268dfdb0f91e053db014b973f67e34ed8ae28211410910693',\n", @@ -3575,70 +4662,64 @@ " 'Robin Fischer',\n", " 'denim jeans'\n", " ],\n", + " [\n", + " CUSTOMER_ID,\n", + " 'Feb, 2024',\n", + " 'Alex Smith',\n", + " 'oversized sweaters'\n", + " ],\n", "]" ] }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 29, "metadata": { "colab": { "base_uri": "https://localhost:8080/", - "height": 1000 + "height": 626 }, "id": "XsBcFQLlekeI", - "outputId": "974df8d3-1e34-44e2-e355-3c452f3844c6" + "outputId": "e1775521-0eb2-44d8-b9fc-515c4d5b15e2" }, "outputs": [ { - "name": "stdout", "output_type": "stream", + "name": "stdout", "text": [ - "Running on local URL: http://127.0.0.1:7860\n", - "\n", - "Could not create share link. Missing file: /Users/zachblumenfeld/demo/genai-workshop/env/lib/python3.10/site-packages/gradio/frpc_darwin_arm64_v0.2. \n", + "Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().\n", + "Running on public URL: https://9d6f49816e6dd2f28c.gradio.live\n", "\n", - "Please check your internet connection. This can happen if your antivirus software blocks the download of this file. You can install manually by following these steps: \n", - "\n", - "1. Download this file: https://cdn-media.huggingface.co/frpc-gradio-0.2/frpc_darwin_arm64\n", - "2. Rename the downloaded file to: frpc_darwin_arm64_v0.2\n", - "3. Move the file to this location: /Users/zachblumenfeld/demo/genai-workshop/env/lib/python3.10/site-packages/gradio\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n" + "This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)\n" ] }, { + "output_type": "display_data", "data": { - "text/html": [ - "
" - ], "text/plain": [ "" + ], + "text/html": [ + "
" ] }, - "metadata": {}, - "output_type": "display_data" + "metadata": {} }, { - "name": "stdout", "output_type": "stream", + "name": "stdout", "text": [ "Keyboard interruption in main thread... closing server.\n", - "Killing tunnel 127.0.0.1:7860 <> None\n" + "Killing tunnel 127.0.0.1:7860 <> https://9d6f49816e6dd2f28c.gradio.live\n" ] }, { + "output_type": "execute_result", "data": { "text/plain": [] }, - "execution_count": 34, "metadata": {}, - "output_type": "execute_result" + "execution_count": 29 } ], "source": [ @@ -3646,11 +4727,11 @@ "\n", "def message_generator(*x):\n", " chain = get_chain(x[0])\n", - " return chain.invoke({'searchPrompt':x[3], 'customerName':x[2], 'timeOfYear': x[1]})\n", + " return chain.invoke({'customerInterests':x[3], 'customerName':x[2], 'timeOfYear': x[1]})\n", "\n", "customer_id = gr.Textbox(value=CUSTOMER_ID, label=\"Customer ID\")\n", - "time_of_year = gr.Textbox(value=\"Feb, 2024\", label=\"Time Of Year\")\n", - "search_prompt_txt = gr.Textbox(value='Oversized Sweaters', label=\"Customer Interests(s)\")\n", + "time_of_year = gr.Textbox(value=\"June, 2024\", label=\"Time Of Year\")\n", + "search_prompt_txt = gr.Textbox(value='denim jeans', label=\"Customer Interests(s)\")\n", "customer_name = gr.Textbox(value='Alex Smith', label=\"Customer Name\")\n", "message_result = gr.Markdown( label=\"Message\")\n", "\n", diff --git a/ws.env.template b/ws.env.template index 7351199..fdd578d 100644 --- a/ws.env.template +++ b/ws.env.template @@ -9,11 +9,4 @@ NEO4J_PASSWORD= #***************************************************************** # AI #***************************************************************** -LLM=gpt-4 - -#***************************************************************** -# OpenAI -#***************************************************************** -# Only required when using OpenAI embedding model - OPENAI_API_KEY=sk-... \ No newline at end of file