diff --git a/nbs/13_new_preproc_bge_test.ipynb b/nbs/13_new_preproc_bge_test.ipynb index 0babf9d..4c7cc6c 100644 --- a/nbs/13_new_preproc_bge_test.ipynb +++ b/nbs/13_new_preproc_bge_test.ipynb @@ -24,7 +24,74 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "166c4c58fd4141b6bfbb32eb86fb379c", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.6.0.json: 0%| …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-03-20 07:34:33 INFO: Downloading default packages for language: en (English) ...\n", + "2024-03-20 07:34:33 INFO: File exists: /home/awchen/stanza_resources/en/default.zip\n", + "2024-03-20 07:34:36 INFO: Finished downloading models and saved to /home/awchen/stanza_resources.\n", + "2024-03-20 07:34:36 INFO: Checking for updates to resources.json in case models have been updated. Note: this behavior can be turned off with download_method=None or download_method=DownloadMethod.REUSE_RESOURCES\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "380b40c3a490494cbc23c9d71496ab16", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.6.0.json: 0%| …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-03-20 07:34:37 INFO: Loading these models for language: en (English):\n", + "======================================\n", + "| Processor | Package |\n", + "--------------------------------------\n", + "| tokenize | combined |\n", + "| pos | combined_charlm |\n", + "| lemma | combined_nocharlm |\n", + "| constituency | ptb3-revised_charlm |\n", + "| depparse | combined_charlm |\n", + "| sentiment | sstplus |\n", + "| ner | ontonotes_charlm |\n", + "======================================\n", + "\n", + "2024-03-20 07:34:37 INFO: Using device: cpu\n", + "2024-03-20 07:34:37 INFO: Loading: tokenize\n", + "2024-03-20 07:34:37 INFO: Loading: pos\n", + "2024-03-20 07:34:38 INFO: Loading: lemma\n", + "2024-03-20 07:34:38 INFO: Loading: constituency\n", + "2024-03-20 07:34:38 INFO: Loading: depparse\n", + "2024-03-20 07:34:38 INFO: Loading: sentiment\n", + "2024-03-20 07:34:38 INFO: Loading: ner\n", + "2024-03-20 07:34:39 INFO: Done loading processors!\n" + ] + } + ], "source": [ "# | hide\n", "# from bertopic import BERTopic\n", @@ -36,8 +103,6 @@ "# from hdbscan import HDBSCAN\n", "from itertools import tee, islice, product\n", "import joblib\n", - "# import mlflow\n", - "# from mlflow.models import infer_signature\n", "import nbdev\n", "from nbdev.showdoc import *\n", "import pandas as pd\n", @@ -52,7 +117,6 @@ "from sklearn.model_selection import train_test_split\n", "from sklearn.pipeline import make_pipeline\n", "from src.custom_sklearn_text_transformer_mlflow import CustomSKLearnAnalyzer\n", - "# from src.custom_stanza_mlflow import CustomSKLearnWrapper\n", "import src.dataframe_preprocessor as dfpp\n", "import stanza\n", "from tqdm import tqdm\n", @@ -96,7 +160,154 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "c622bb1b2718469497a488bcfeca738e", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.6.0.json: 0%| …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-03-20 07:34:39 INFO: Downloading default packages for language: en (English) ...\n", + "2024-03-20 07:34:40 INFO: File exists: /home/awchen/stanza_resources/en/default.zip\n", + "2024-03-20 07:34:43 INFO: Finished downloading models and saved to /home/awchen/stanza_resources.\n", + "2024-03-20 07:34:43 INFO: Checking for updates to resources.json in case models have been updated. Note: this behavior can be turned off with download_method=None or download_method=DownloadMethod.REUSE_RESOURCES\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "c4d6af04d98f489ea63edb2d0dbb8a88", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.6.0.json: 0%| …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-03-20 07:34:44 INFO: Loading these models for language: en (English):\n", + "======================================\n", + "| Processor | Package |\n", + "--------------------------------------\n", + "| tokenize | combined |\n", + "| pos | combined_charlm |\n", + "| lemma | combined_nocharlm |\n", + "| constituency | ptb3-revised_charlm |\n", + "| depparse | combined_charlm |\n", + "| sentiment | sstplus |\n", + "| ner | ontonotes_charlm |\n", + "======================================\n", + "\n", + "2024-03-20 07:34:44 INFO: Using device: cuda\n", + "2024-03-20 07:34:44 INFO: Loading: tokenize\n", + "2024-03-20 07:34:47 INFO: Loading: pos\n", + "2024-03-20 07:34:47 INFO: Loading: lemma\n", + "2024-03-20 07:34:47 INFO: Loading: constituency\n", + "2024-03-20 07:34:47 INFO: Loading: depparse\n", + "2024-03-20 07:34:47 INFO: Loading: sentiment\n", + "2024-03-20 07:34:48 INFO: Loading: ner\n", + "2024-03-20 07:34:48 INFO: Done loading processors!\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "--------------\n", + "Raw Dataframe:\n", + " id \\\n", + "0 54a2b6b019925f464b373351 \n", + "1 54a408a019925f464b3733bc \n", + "2 54a408a26529d92b2c003631 \n", + "3 54a408a66529d92b2c003638 \n", + "4 54a408a719925f464b3733cc \n", + "\n", + " dek \\\n", + "0 How does fried chicken achieve No. 1 status? B... \n", + "1 Spinaci all'Ebraica \n", + "2 This majestic, moist, and richly spiced honey ... \n", + "3 The idea for this sandwich came to me when my ... \n", + "4 In 1930, Simon Agranat, the chief justice of t... \n", + "\n", + " hed pubDate \\\n", + "0 Pickle-Brined Fried Chicken 2014-08-19T04:00:00.000Z \n", + "1 Spinach Jewish Style 2008-09-09T04:00:00.000Z \n", + "2 New Year’s Honey Cake 2008-09-10T04:00:00.000Z \n", + "3 The B.L.A.—Bagel with Lox and Avocado 2008-09-08T04:00:00.000Z \n", + "4 Shakshuka a la Doktor Shakshuka 2008-09-09T04:00:00.000Z \n", + "\n", + " author type \\\n", + "0 [] recipe \n", + "1 [{'name': 'Edda Servi Machlin'}] recipe \n", + "2 [{'name': 'Marcy Goldman'}] recipe \n", + "3 [{'name': 'Faye Levy'}] recipe \n", + "4 [{'name': 'Joan Nathan'}] recipe \n", + "\n", + " url \\\n", + "0 /recipes/food/views/pickle-brined-fried-chicke... \n", + "1 /recipes/food/views/spinach-jewish-style-350152 \n", + "2 /recipes/food/views/majestic-and-moist-new-yea... \n", + "3 /recipes/food/views/the-b-l-a-bagel-with-lox-a... \n", + "4 /recipes/food/views/shakshuka-a-la-doktor-shak... \n", + "\n", + " photoData \\\n", + "0 {'id': '54a2b64a6529d92b2c003409', 'filename':... \n", + "1 {'id': '56746182accb4c9831e45e0a', 'filename':... \n", + "2 {'id': '55e85ba4cf90d6663f728014', 'filename':... \n", + "3 {'id': '5674617e47d1a28026045e4f', 'filename':... \n", + "4 {'id': '56746183b47c050a284a4e15', 'filename':... \n", + "\n", + " tag aggregateRating \\\n", + "0 {'category': 'ingredient', 'name': 'Chicken', ... 3.11 \n", + "1 {'category': 'cuisine', 'name': 'Italian', 'ur... 3.22 \n", + "2 {'category': 'cuisine', 'name': 'Jewish', 'url... 3.62 \n", + "3 {'category': 'cuisine', 'name': 'Jewish', 'url... 4.00 \n", + "4 {'category': 'cuisine', 'name': 'Jewish', 'url... 2.71 \n", + "\n", + " ingredients \\\n", + "0 [1 tablespoons yellow mustard seeds, 1 tablesp... \n", + "1 [3 pounds small-leaved bulk spinach, Salt, 1/2... \n", + "2 [3 1/2 cups all-purpose flour, 1 tablespoon ba... \n", + "3 [1 small ripe avocado, preferably Hass (see No... \n", + "4 [2 pounds fresh tomatoes, unpeeled and cut in ... \n", + "\n", + " prepSteps reviewsCount \\\n", + "0 [Toast mustard and coriander seeds in a dry me... 7 \n", + "1 [Remove the stems and roots from the spinach. ... 5 \n", + "2 [I like this cake best baked in a 9-inch angel... 105 \n", + "3 [A short time before serving, mash avocado and... 7 \n", + "4 [1. Place the tomatoes, garlic, salt, paprika,... 7 \n", + "\n", + " willMakeAgainPct dateCrawled \n", + "0 100 1498547035 \n", + "1 80 1498547740 \n", + "2 88 1498547738 \n", + "3 100 1498547740 \n", + "4 83 1498547740 \n", + "(34756, 15)\n" + ] + } + ], "source": [ "# instantiate stanza pipeline\n", "stanza.download('en')\n", @@ -124,7 +335,114 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "--------------\n", + "Preprocessed Dataframe:\n", + " dek \\\n", + "id \n", + "54a4270b19925f464b37c1dc \n", + "54a42cde19925f464b3809d2 Green chiles pickled in soy sauce and vinegar ... \n", + "54a433036529d92b2c015de3 This soup features the flavors of India: aroma... \n", + "54a451926529d92b2c01eda8 \n", + "54a430876529d92b2c013e2b Brown sugar and molasses are balanced by fresh... \n", + "\n", + " hed \\\n", + "id \n", + "54a4270b19925f464b37c1dc Grilled Hearts of Romaine with Blue Cheese Vin... \n", + "54a42cde19925f464b3809d2 Soy-Pickled Jalapeños \n", + "54a433036529d92b2c015de3 Curried Potato and Spinach Soup with Onion Sal... \n", + "54a451926529d92b2c01eda8 Chicken Soup \n", + "54a430876529d92b2c013e2b Sweet-Hot Barbecue Sauce \n", + "\n", + " aggregateRating \\\n", + "id \n", + "54a4270b19925f464b37c1dc 3.64 \n", + "54a42cde19925f464b3809d2 3.43 \n", + "54a433036529d92b2c015de3 3.00 \n", + "54a451926529d92b2c01eda8 3.19 \n", + "54a430876529d92b2c013e2b 0.00 \n", + "\n", + " ingredients \\\n", + "id \n", + "54a4270b19925f464b37c1dc [1 1/2 cups white wine vinegar, 1/2 cup sugar,... \n", + "54a42cde19925f464b3809d2 [3 large fresh jalapeños (4 inches), sliced 1/... \n", + "54a433036529d92b2c015de3 [4 cups chopped red onions (about 2 large), 1 ... \n", + "54a451926529d92b2c01eda8 [1 pound chicken parts, 2 stalks celery, inclu... \n", + "54a430876529d92b2c013e2b [2 tablespoons olive oil, 1 cup chopped onion,... \n", + "\n", + " prepSteps \\\n", + "id \n", + "54a4270b19925f464b37c1dc [Combine first 5 ingredients and 1/4 teaspoon ... \n", + "54a42cde19925f464b3809d2 [Combine all ingredients in a small heavy sauc... \n", + "54a433036529d92b2c015de3 [Combine first 5 ingredients in heavy medium s... \n", + "54a451926529d92b2c01eda8 [1. Pour 12 cups of cold water into a large st... \n", + "54a430876529d92b2c013e2b [Heat oil in large saucepan over medium-high h... \n", + "\n", + " reviewsCount willMakeAgainPct \\\n", + "id \n", + "54a4270b19925f464b37c1dc 9 100 \n", + "54a42cde19925f464b3809d2 6 100 \n", + "54a433036529d92b2c015de3 6 67 \n", + "54a451926529d92b2c01eda8 32 87 \n", + "54a430876529d92b2c013e2b 0 0 \n", + "\n", + " ingredients_lemmafied \\\n", + "id \n", + "54a4270b19925f464b37c1dc cup white wine vinegar brk cup sugar brk cup w... \n", + "54a42cde19925f464b3809d2 large fresh jalapeño inch slice inch thick brk... \n", + "54a433036529d92b2c015de3 cup chop red onion large brk tablespoon sunflo... \n", + "54a451926529d92b2c01eda8 pound chicken part brk stalk celery include le... \n", + "54a430876529d92b2c013e2b tablespoon olive oil brk cup chop onion brk cu... \n", + "\n", + " cuisine_name \\\n", + "id \n", + "54a4270b19925f464b37c1dc Missing Cuisine \n", + "54a42cde19925f464b3809d2 Missing Cuisine \n", + "54a433036529d92b2c015de3 Indian \n", + "54a451926529d92b2c01eda8 Kosher \n", + "54a430876529d92b2c013e2b Missing Cuisine \n", + "\n", + " photo_filename \\\n", + "id \n", + "54a4270b19925f464b37c1dc EP_12162015_placeholders_casual.jpg \n", + "54a42cde19925f464b3809d2 EP_12162015_placeholders_rustic.jpg \n", + "54a433036529d92b2c015de3 234125.jpg \n", + "54a451926529d92b2c01eda8 EP_12162015_placeholders_formal.jpg \n", + "54a430876529d92b2c013e2b EP_12162015_placeholders_rustic.jpg \n", + "\n", + " photo_credit \\\n", + "id \n", + "54a4270b19925f464b37c1dc Photo by Chelsea Kyle, Prop Styling by Rhoda B... \n", + "54a42cde19925f464b3809d2 Photo by Chelsea Kyle, Prop Styling by Anna St... \n", + "54a433036529d92b2c015de3 Brian Leatart \n", + "54a451926529d92b2c01eda8 Photo by Chelsea Kyle, Prop Styling by Rhoda B... \n", + "54a430876529d92b2c013e2b Photo by Chelsea Kyle, Prop Styling by Anna St... \n", + "\n", + " author_name date_published \\\n", + "id \n", + "54a4270b19925f464b37c1dc Kate Higgins 2010-12-16 04:00:00+00:00 \n", + "54a42cde19925f464b3809d2 Lillian Chou 2009-02-19 04:00:00+00:00 \n", + "54a433036529d92b2c015de3 Peter Gordon 2006-03-07 04:00:00+00:00 \n", + "54a451926529d92b2c01eda8 Sharon Lebewohl 2004-08-20 04:00:00+00:00 \n", + "54a430876529d92b2c013e2b Suzanne Tracht 2007-12-03 20:11:11+00:00 \n", + "\n", + " recipe_url \n", + "id \n", + "54a4270b19925f464b37c1dc https://www.epicurious.com/recipes/food/views/... \n", + "54a42cde19925f464b3809d2 https://www.epicurious.com/recipes/food/views/... \n", + "54a433036529d92b2c015de3 https://www.epicurious.com/recipes/food/views/... \n", + "54a451926529d92b2c01eda8 https://www.epicurious.com/recipes/food/views/... \n", + "54a430876529d92b2c013e2b https://www.epicurious.com/recipes/food/views/... \n", + "(50, 14)\n" + ] + } + ], "source": [ "# take sample and train/test split \n", "subset_df = raw_df.sample(n=100, random_state=45)\n", @@ -143,30 +461,54 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "fit_transform start: 2024-03-20 07:36:09.748134\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 50/50 [00:00<00:00, 8744.69it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "fit_transform end: 2024-03-20 07:36:09.766620\n", + "Index(['English', 'English hothouse', 'English hothouse cucumber', 'available',\n", + " 'baby', 'baking', 'baking powder', 'bay', 'bay leave', 'beef',\n", + " ...\n", + " 'white', 'white vinegar', 'white wine', 'white wine vinegar', 'whole',\n", + " 'wine', 'wine vinegar', 'yukon', 'yukon gold', 'yukon gold potato'],\n", + " dtype='object', length=283)\n" + ] + } + ], "source": [ "# cv_params are parameters for the sklearn CountVectorizer or TFIDFVectorizer\n", - "sklearn_transformer_params = {\n", - " 'analyzer': 'word',\n", - " 'ngram_range': (1,4),\n", + "sklearn_transformer_params = { \n", + " 'analyzer': CustomSKLearnAnalyzer().ngram_maker(\n", + " min_ngram_length=1,\n", + " max_ngram_length=4,\n", + " ),\n", " 'min_df':3,\n", - " 'binary':False\n", + " # 'binary':False\n", "}\n", "\n", "sklearn_transformer = TfidfVectorizer(**sklearn_transformer_params)\n", "\n", - "# print('\\n')\n", - "# print('-' * 80)\n", - "# print('sklearn fit transform on ingredients:', end='\\n')\n", - "\n", - "model_input = to_nlp_df['ingredients'].apply(\" \".join).str.lower()\n", - "\n", - "print(\"sklearn fit transform start: \" + str(datetime.now()))\n", + "model_input = to_nlp_df['ingredients_lemmafied']\n", "\n", "# Do fit transform on data\n", + "print(\"fit_transform start: \" + str(datetime.now()))\n", "response = sklearn_transformer.fit_transform(tqdm(model_input)) \n", - "\n", - "print(\"sklearn fit transform end: \" + str(datetime.now()))\n", + "print(\"fit_transform end: \" + str(datetime.now()))\n", "\n", "transformed_recipe = pd.DataFrame(\n", " response.toarray(),\n", @@ -174,42 +516,2297 @@ " index=model_input.index\n", ")\n", "\n", - "print(transformed_recipe)\n", "print(transformed_recipe.columns)" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
EnglishEnglish hothouseEnglish hothouse cucumberavailablebabybakingbaking powderbaybay leavebeef...whitewhite vinegarwhite winewhite wine vinegarwholewinewine vinegaryukonyukon goldyukon gold potato
id
54a4270b19925f464b37c1dc0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.1499950.1499950.000000...0.1881550.0000000.2811100.2811100.0000000.2413430.2526410.0000000.0000000.000000
54a42cde19925f464b3809d20.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000
54a433036529d92b2c015de30.0000000.0000000.0000000.0000000.0896660.0000000.0000000.0896660.0896660.000000...0.0562390.0000000.0000000.0000000.0000000.0000000.0000000.0896660.0896660.089666
54a451926529d92b2c01eda80.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000...0.0000000.0000000.0000000.0000000.4780390.0000000.0000000.0000000.0000000.000000
54a430876529d92b2c013e2b0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000...0.0758790.1209800.0000000.0000000.0000000.0973290.0000000.0000000.0000000.000000
54a453df6529d92b2c0206870.0000000.0000000.0000000.0000000.0000000.1344960.0000000.0000000.0000000.000000...0.0000000.0000000.0000000.0000000.1154690.0000000.0000000.0000000.0000000.000000
55b0e7116284773353bf45800.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000
54a42bab6529d92b2c00ffa70.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000
54a4748f19925f464b399ef20.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000...0.0971410.0000000.0000000.0000000.0000000.0000000.0000000.1548790.1548790.154879
54a4356a19925f464b3875bb0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000...0.0984600.0000000.1471020.1471020.1262920.1262920.1322040.0000000.0000000.000000
54a4697e6529d92b2c0279d30.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000
54a45e426529d92b2c02488f0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000
54a452c96529d92b2c01f8890.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.252416...0.1583160.2524160.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000
54a4323619925f464b384bcc0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000
54a4259119925f464b37af9c0.0000000.0000000.0000000.0000000.1655440.0000000.0000000.0000000.0000000.000000...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000
54a431da6529d92b2c014ee90.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.1841320.1841320.184132
54a426fd19925f464b37c1250.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000
54a47bb019925f464b39b9b70.0000000.0000000.0000000.0000000.0000000.2206190.2354370.0000000.0000000.000000...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000
54a434d819925f464b386e620.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000
54a428116529d92b2c00d1a70.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000
54a436036529d92b2c01859e0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000
54a47edf19925f464b39c58d0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000
54a419706529d92b2c0066500.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000
54a4349619925f464b386b120.0000000.0000000.0000000.0000000.0000000.1257390.1341840.0000000.0000000.000000...0.0000000.0000000.0000000.0000000.1079520.0000000.0000000.0000000.0000000.000000
54a4340f6529d92b2c016be80.0000000.0000000.0000000.0000000.0000000.0000000.0000000.1880890.1880890.000000...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000
54a40e546529d92b2c0046060.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000
54a428b419925f464b37d5ce0.0000000.0000000.0000000.0000000.0000000.1202450.1283210.0000000.0000000.000000...0.0000000.0000000.0000000.0000000.1032350.0000000.0000000.0000000.0000000.000000
54a453a519925f464b38fd160.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000
54a41cb219925f464b376d820.1293530.1293530.1293530.1293530.0000000.0000000.0000000.0000000.0000000.000000...0.0811310.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000
54a431896529d92b2c014b270.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.147772...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000
54a423ab19925f464b3799f20.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000...0.0000000.0000000.0000000.0000000.1323490.0000000.0000000.0000000.0000000.000000
54a47c1419925f464b39bb280.0000000.0000000.0000000.1468740.0000000.0000000.0000000.0000000.0000000.000000...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000
593ee3ba12c27b182380821f0.1083410.1083410.1083410.0000000.0000000.0000000.0000000.0000000.0000000.000000...0.0679520.0000000.1015220.1015220.0000000.0871600.0912400.0000000.0000000.000000
54a456366529d92b2c02235a0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.120817...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000
54a452d419925f464b38f1b50.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000...0.1119270.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000
54a4659b6529d92b2c026a530.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000
54a46d5d19925f464b3982d30.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000...0.1110530.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000
54a4582119925f464b3927a10.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000
54a4205319925f464b377c9f0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000
54a470cc19925f464b39906b0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000...0.0000000.0000000.0000000.0000000.0000000.1689370.1768450.0000000.0000000.000000
54a44f4a6529d92b2c01de450.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000...0.2592130.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000
592ef494ae10ad089795ebfa0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000...0.0985600.0000000.1472520.1472520.0000000.1264210.1323390.0000000.0000000.000000
54a41f016529d92b2c00757d0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000...0.1242630.1981220.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000
54a436266529d92b2c01876e0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000
54a428bd19925f464b37d63e0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000
54a45a4f6529d92b2c0234d10.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000
54a41ed76529d92b2c0074400.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000...0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000
54a42c906529d92b2c010b740.0000000.0000000.0000000.1851290.0000000.0000000.0000000.0000000.0000000.000000...0.0000000.0000000.0000000.0000000.1489370.0000000.0000000.0000000.0000000.000000
569519a6dc18ea6c22c9b9ab0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000...0.4072260.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000000
54a438d56529d92b2c0196480.1511650.1511650.1511650.0000000.1511650.0000000.0000000.0000000.0000000.000000...0.0000000.0000000.0000000.0000000.0000000.1216130.1273060.0000000.0000000.000000
\n", + "

50 rows × 283 columns

\n", + "
" + ], + "text/plain": [ + " English English hothouse \\\n", + "id \n", + "54a4270b19925f464b37c1dc 0.000000 0.000000 \n", + "54a42cde19925f464b3809d2 0.000000 0.000000 \n", + "54a433036529d92b2c015de3 0.000000 0.000000 \n", + "54a451926529d92b2c01eda8 0.000000 0.000000 \n", + "54a430876529d92b2c013e2b 0.000000 0.000000 \n", + "54a453df6529d92b2c020687 0.000000 0.000000 \n", + "55b0e7116284773353bf4580 0.000000 0.000000 \n", + "54a42bab6529d92b2c00ffa7 0.000000 0.000000 \n", + "54a4748f19925f464b399ef2 0.000000 0.000000 \n", + "54a4356a19925f464b3875bb 0.000000 0.000000 \n", + "54a4697e6529d92b2c0279d3 0.000000 0.000000 \n", + "54a45e426529d92b2c02488f 0.000000 0.000000 \n", + "54a452c96529d92b2c01f889 0.000000 0.000000 \n", + "54a4323619925f464b384bcc 0.000000 0.000000 \n", + "54a4259119925f464b37af9c 0.000000 0.000000 \n", + "54a431da6529d92b2c014ee9 0.000000 0.000000 \n", + "54a426fd19925f464b37c125 0.000000 0.000000 \n", + "54a47bb019925f464b39b9b7 0.000000 0.000000 \n", + "54a434d819925f464b386e62 0.000000 0.000000 \n", + "54a428116529d92b2c00d1a7 0.000000 0.000000 \n", + "54a436036529d92b2c01859e 0.000000 0.000000 \n", + "54a47edf19925f464b39c58d 0.000000 0.000000 \n", + "54a419706529d92b2c006650 0.000000 0.000000 \n", + "54a4349619925f464b386b12 0.000000 0.000000 \n", + "54a4340f6529d92b2c016be8 0.000000 0.000000 \n", + "54a40e546529d92b2c004606 0.000000 0.000000 \n", + "54a428b419925f464b37d5ce 0.000000 0.000000 \n", + "54a453a519925f464b38fd16 0.000000 0.000000 \n", + "54a41cb219925f464b376d82 0.129353 0.129353 \n", + "54a431896529d92b2c014b27 0.000000 0.000000 \n", + "54a423ab19925f464b3799f2 0.000000 0.000000 \n", + "54a47c1419925f464b39bb28 0.000000 0.000000 \n", + "593ee3ba12c27b182380821f 0.108341 0.108341 \n", + "54a456366529d92b2c02235a 0.000000 0.000000 \n", + "54a452d419925f464b38f1b5 0.000000 0.000000 \n", + "54a4659b6529d92b2c026a53 0.000000 0.000000 \n", + "54a46d5d19925f464b3982d3 0.000000 0.000000 \n", + "54a4582119925f464b3927a1 0.000000 0.000000 \n", + "54a4205319925f464b377c9f 0.000000 0.000000 \n", + "54a470cc19925f464b39906b 0.000000 0.000000 \n", + "54a44f4a6529d92b2c01de45 0.000000 0.000000 \n", + "592ef494ae10ad089795ebfa 0.000000 0.000000 \n", + "54a41f016529d92b2c00757d 0.000000 0.000000 \n", + "54a436266529d92b2c01876e 0.000000 0.000000 \n", + "54a428bd19925f464b37d63e 0.000000 0.000000 \n", + "54a45a4f6529d92b2c0234d1 0.000000 0.000000 \n", + "54a41ed76529d92b2c007440 0.000000 0.000000 \n", + "54a42c906529d92b2c010b74 0.000000 0.000000 \n", + "569519a6dc18ea6c22c9b9ab 0.000000 0.000000 \n", + "54a438d56529d92b2c019648 0.151165 0.151165 \n", + "\n", + " English hothouse cucumber available baby \\\n", + "id \n", + "54a4270b19925f464b37c1dc 0.000000 0.000000 0.000000 \n", + "54a42cde19925f464b3809d2 0.000000 0.000000 0.000000 \n", + "54a433036529d92b2c015de3 0.000000 0.000000 0.089666 \n", + "54a451926529d92b2c01eda8 0.000000 0.000000 0.000000 \n", + "54a430876529d92b2c013e2b 0.000000 0.000000 0.000000 \n", + "54a453df6529d92b2c020687 0.000000 0.000000 0.000000 \n", + "55b0e7116284773353bf4580 0.000000 0.000000 0.000000 \n", + "54a42bab6529d92b2c00ffa7 0.000000 0.000000 0.000000 \n", + "54a4748f19925f464b399ef2 0.000000 0.000000 0.000000 \n", + "54a4356a19925f464b3875bb 0.000000 0.000000 0.000000 \n", + "54a4697e6529d92b2c0279d3 0.000000 0.000000 0.000000 \n", + "54a45e426529d92b2c02488f 0.000000 0.000000 0.000000 \n", + "54a452c96529d92b2c01f889 0.000000 0.000000 0.000000 \n", + "54a4323619925f464b384bcc 0.000000 0.000000 0.000000 \n", + "54a4259119925f464b37af9c 0.000000 0.000000 0.165544 \n", + "54a431da6529d92b2c014ee9 0.000000 0.000000 0.000000 \n", + "54a426fd19925f464b37c125 0.000000 0.000000 0.000000 \n", + "54a47bb019925f464b39b9b7 0.000000 0.000000 0.000000 \n", + "54a434d819925f464b386e62 0.000000 0.000000 0.000000 \n", + "54a428116529d92b2c00d1a7 0.000000 0.000000 0.000000 \n", + "54a436036529d92b2c01859e 0.000000 0.000000 0.000000 \n", + "54a47edf19925f464b39c58d 0.000000 0.000000 0.000000 \n", + "54a419706529d92b2c006650 0.000000 0.000000 0.000000 \n", + "54a4349619925f464b386b12 0.000000 0.000000 0.000000 \n", + "54a4340f6529d92b2c016be8 0.000000 0.000000 0.000000 \n", + "54a40e546529d92b2c004606 0.000000 0.000000 0.000000 \n", + "54a428b419925f464b37d5ce 0.000000 0.000000 0.000000 \n", + "54a453a519925f464b38fd16 0.000000 0.000000 0.000000 \n", + "54a41cb219925f464b376d82 0.129353 0.129353 0.000000 \n", + "54a431896529d92b2c014b27 0.000000 0.000000 0.000000 \n", + "54a423ab19925f464b3799f2 0.000000 0.000000 0.000000 \n", + "54a47c1419925f464b39bb28 0.000000 0.146874 0.000000 \n", + "593ee3ba12c27b182380821f 0.108341 0.000000 0.000000 \n", + "54a456366529d92b2c02235a 0.000000 0.000000 0.000000 \n", + "54a452d419925f464b38f1b5 0.000000 0.000000 0.000000 \n", + "54a4659b6529d92b2c026a53 0.000000 0.000000 0.000000 \n", + "54a46d5d19925f464b3982d3 0.000000 0.000000 0.000000 \n", + "54a4582119925f464b3927a1 0.000000 0.000000 0.000000 \n", + "54a4205319925f464b377c9f 0.000000 0.000000 0.000000 \n", + "54a470cc19925f464b39906b 0.000000 0.000000 0.000000 \n", + "54a44f4a6529d92b2c01de45 0.000000 0.000000 0.000000 \n", + "592ef494ae10ad089795ebfa 0.000000 0.000000 0.000000 \n", + "54a41f016529d92b2c00757d 0.000000 0.000000 0.000000 \n", + "54a436266529d92b2c01876e 0.000000 0.000000 0.000000 \n", + "54a428bd19925f464b37d63e 0.000000 0.000000 0.000000 \n", + "54a45a4f6529d92b2c0234d1 0.000000 0.000000 0.000000 \n", + "54a41ed76529d92b2c007440 0.000000 0.000000 0.000000 \n", + "54a42c906529d92b2c010b74 0.000000 0.185129 0.000000 \n", + "569519a6dc18ea6c22c9b9ab 0.000000 0.000000 0.000000 \n", + "54a438d56529d92b2c019648 0.151165 0.000000 0.151165 \n", + "\n", + " baking baking powder bay bay leave \\\n", + "id \n", + "54a4270b19925f464b37c1dc 0.000000 0.000000 0.149995 0.149995 \n", + "54a42cde19925f464b3809d2 0.000000 0.000000 0.000000 0.000000 \n", + "54a433036529d92b2c015de3 0.000000 0.000000 0.089666 0.089666 \n", + "54a451926529d92b2c01eda8 0.000000 0.000000 0.000000 0.000000 \n", + "54a430876529d92b2c013e2b 0.000000 0.000000 0.000000 0.000000 \n", + "54a453df6529d92b2c020687 0.134496 0.000000 0.000000 0.000000 \n", + "55b0e7116284773353bf4580 0.000000 0.000000 0.000000 0.000000 \n", + "54a42bab6529d92b2c00ffa7 0.000000 0.000000 0.000000 0.000000 \n", + "54a4748f19925f464b399ef2 0.000000 0.000000 0.000000 0.000000 \n", + "54a4356a19925f464b3875bb 0.000000 0.000000 0.000000 0.000000 \n", + "54a4697e6529d92b2c0279d3 0.000000 0.000000 0.000000 0.000000 \n", + "54a45e426529d92b2c02488f 0.000000 0.000000 0.000000 0.000000 \n", + "54a452c96529d92b2c01f889 0.000000 0.000000 0.000000 0.000000 \n", + "54a4323619925f464b384bcc 0.000000 0.000000 0.000000 0.000000 \n", + "54a4259119925f464b37af9c 0.000000 0.000000 0.000000 0.000000 \n", + "54a431da6529d92b2c014ee9 0.000000 0.000000 0.000000 0.000000 \n", + "54a426fd19925f464b37c125 0.000000 0.000000 0.000000 0.000000 \n", + "54a47bb019925f464b39b9b7 0.220619 0.235437 0.000000 0.000000 \n", + "54a434d819925f464b386e62 0.000000 0.000000 0.000000 0.000000 \n", + "54a428116529d92b2c00d1a7 0.000000 0.000000 0.000000 0.000000 \n", + "54a436036529d92b2c01859e 0.000000 0.000000 0.000000 0.000000 \n", + "54a47edf19925f464b39c58d 0.000000 0.000000 0.000000 0.000000 \n", + "54a419706529d92b2c006650 0.000000 0.000000 0.000000 0.000000 \n", + "54a4349619925f464b386b12 0.125739 0.134184 0.000000 0.000000 \n", + "54a4340f6529d92b2c016be8 0.000000 0.000000 0.188089 0.188089 \n", + "54a40e546529d92b2c004606 0.000000 0.000000 0.000000 0.000000 \n", + "54a428b419925f464b37d5ce 0.120245 0.128321 0.000000 0.000000 \n", + "54a453a519925f464b38fd16 0.000000 0.000000 0.000000 0.000000 \n", + "54a41cb219925f464b376d82 0.000000 0.000000 0.000000 0.000000 \n", + "54a431896529d92b2c014b27 0.000000 0.000000 0.000000 0.000000 \n", + "54a423ab19925f464b3799f2 0.000000 0.000000 0.000000 0.000000 \n", + "54a47c1419925f464b39bb28 0.000000 0.000000 0.000000 0.000000 \n", + "593ee3ba12c27b182380821f 0.000000 0.000000 0.000000 0.000000 \n", + "54a456366529d92b2c02235a 0.000000 0.000000 0.000000 0.000000 \n", + "54a452d419925f464b38f1b5 0.000000 0.000000 0.000000 0.000000 \n", + "54a4659b6529d92b2c026a53 0.000000 0.000000 0.000000 0.000000 \n", + "54a46d5d19925f464b3982d3 0.000000 0.000000 0.000000 0.000000 \n", + "54a4582119925f464b3927a1 0.000000 0.000000 0.000000 0.000000 \n", + "54a4205319925f464b377c9f 0.000000 0.000000 0.000000 0.000000 \n", + "54a470cc19925f464b39906b 0.000000 0.000000 0.000000 0.000000 \n", + "54a44f4a6529d92b2c01de45 0.000000 0.000000 0.000000 0.000000 \n", + "592ef494ae10ad089795ebfa 0.000000 0.000000 0.000000 0.000000 \n", + "54a41f016529d92b2c00757d 0.000000 0.000000 0.000000 0.000000 \n", + "54a436266529d92b2c01876e 0.000000 0.000000 0.000000 0.000000 \n", + "54a428bd19925f464b37d63e 0.000000 0.000000 0.000000 0.000000 \n", + "54a45a4f6529d92b2c0234d1 0.000000 0.000000 0.000000 0.000000 \n", + "54a41ed76529d92b2c007440 0.000000 0.000000 0.000000 0.000000 \n", + "54a42c906529d92b2c010b74 0.000000 0.000000 0.000000 0.000000 \n", + "569519a6dc18ea6c22c9b9ab 0.000000 0.000000 0.000000 0.000000 \n", + "54a438d56529d92b2c019648 0.000000 0.000000 0.000000 0.000000 \n", + "\n", + " beef ... white white vinegar white wine \\\n", + "id ... \n", + "54a4270b19925f464b37c1dc 0.000000 ... 0.188155 0.000000 0.281110 \n", + "54a42cde19925f464b3809d2 0.000000 ... 0.000000 0.000000 0.000000 \n", + "54a433036529d92b2c015de3 0.000000 ... 0.056239 0.000000 0.000000 \n", + "54a451926529d92b2c01eda8 0.000000 ... 0.000000 0.000000 0.000000 \n", + "54a430876529d92b2c013e2b 0.000000 ... 0.075879 0.120980 0.000000 \n", + "54a453df6529d92b2c020687 0.000000 ... 0.000000 0.000000 0.000000 \n", + "55b0e7116284773353bf4580 0.000000 ... 0.000000 0.000000 0.000000 \n", + "54a42bab6529d92b2c00ffa7 0.000000 ... 0.000000 0.000000 0.000000 \n", + "54a4748f19925f464b399ef2 0.000000 ... 0.097141 0.000000 0.000000 \n", + "54a4356a19925f464b3875bb 0.000000 ... 0.098460 0.000000 0.147102 \n", + "54a4697e6529d92b2c0279d3 0.000000 ... 0.000000 0.000000 0.000000 \n", + "54a45e426529d92b2c02488f 0.000000 ... 0.000000 0.000000 0.000000 \n", + "54a452c96529d92b2c01f889 0.252416 ... 0.158316 0.252416 0.000000 \n", + "54a4323619925f464b384bcc 0.000000 ... 0.000000 0.000000 0.000000 \n", + "54a4259119925f464b37af9c 0.000000 ... 0.000000 0.000000 0.000000 \n", + "54a431da6529d92b2c014ee9 0.000000 ... 0.000000 0.000000 0.000000 \n", + "54a426fd19925f464b37c125 0.000000 ... 0.000000 0.000000 0.000000 \n", + "54a47bb019925f464b39b9b7 0.000000 ... 0.000000 0.000000 0.000000 \n", + "54a434d819925f464b386e62 0.000000 ... 0.000000 0.000000 0.000000 \n", + "54a428116529d92b2c00d1a7 0.000000 ... 0.000000 0.000000 0.000000 \n", + "54a436036529d92b2c01859e 0.000000 ... 0.000000 0.000000 0.000000 \n", + "54a47edf19925f464b39c58d 0.000000 ... 0.000000 0.000000 0.000000 \n", + "54a419706529d92b2c006650 0.000000 ... 0.000000 0.000000 0.000000 \n", + "54a4349619925f464b386b12 0.000000 ... 0.000000 0.000000 0.000000 \n", + "54a4340f6529d92b2c016be8 0.000000 ... 0.000000 0.000000 0.000000 \n", + "54a40e546529d92b2c004606 0.000000 ... 0.000000 0.000000 0.000000 \n", + "54a428b419925f464b37d5ce 0.000000 ... 0.000000 0.000000 0.000000 \n", + "54a453a519925f464b38fd16 0.000000 ... 0.000000 0.000000 0.000000 \n", + "54a41cb219925f464b376d82 0.000000 ... 0.081131 0.000000 0.000000 \n", + "54a431896529d92b2c014b27 0.147772 ... 0.000000 0.000000 0.000000 \n", + "54a423ab19925f464b3799f2 0.000000 ... 0.000000 0.000000 0.000000 \n", + "54a47c1419925f464b39bb28 0.000000 ... 0.000000 0.000000 0.000000 \n", + "593ee3ba12c27b182380821f 0.000000 ... 0.067952 0.000000 0.101522 \n", + "54a456366529d92b2c02235a 0.120817 ... 0.000000 0.000000 0.000000 \n", + "54a452d419925f464b38f1b5 0.000000 ... 0.111927 0.000000 0.000000 \n", + "54a4659b6529d92b2c026a53 0.000000 ... 0.000000 0.000000 0.000000 \n", + "54a46d5d19925f464b3982d3 0.000000 ... 0.111053 0.000000 0.000000 \n", + "54a4582119925f464b3927a1 0.000000 ... 0.000000 0.000000 0.000000 \n", + "54a4205319925f464b377c9f 0.000000 ... 0.000000 0.000000 0.000000 \n", + "54a470cc19925f464b39906b 0.000000 ... 0.000000 0.000000 0.000000 \n", + "54a44f4a6529d92b2c01de45 0.000000 ... 0.259213 0.000000 0.000000 \n", + "592ef494ae10ad089795ebfa 0.000000 ... 0.098560 0.000000 0.147252 \n", + "54a41f016529d92b2c00757d 0.000000 ... 0.124263 0.198122 0.000000 \n", + "54a436266529d92b2c01876e 0.000000 ... 0.000000 0.000000 0.000000 \n", + "54a428bd19925f464b37d63e 0.000000 ... 0.000000 0.000000 0.000000 \n", + "54a45a4f6529d92b2c0234d1 0.000000 ... 0.000000 0.000000 0.000000 \n", + "54a41ed76529d92b2c007440 0.000000 ... 0.000000 0.000000 0.000000 \n", + "54a42c906529d92b2c010b74 0.000000 ... 0.000000 0.000000 0.000000 \n", + "569519a6dc18ea6c22c9b9ab 0.000000 ... 0.407226 0.000000 0.000000 \n", + "54a438d56529d92b2c019648 0.000000 ... 0.000000 0.000000 0.000000 \n", + "\n", + " white wine vinegar whole wine \\\n", + "id \n", + "54a4270b19925f464b37c1dc 0.281110 0.000000 0.241343 \n", + "54a42cde19925f464b3809d2 0.000000 0.000000 0.000000 \n", + "54a433036529d92b2c015de3 0.000000 0.000000 0.000000 \n", + "54a451926529d92b2c01eda8 0.000000 0.478039 0.000000 \n", + "54a430876529d92b2c013e2b 0.000000 0.000000 0.097329 \n", + "54a453df6529d92b2c020687 0.000000 0.115469 0.000000 \n", + "55b0e7116284773353bf4580 0.000000 0.000000 0.000000 \n", + "54a42bab6529d92b2c00ffa7 0.000000 0.000000 0.000000 \n", + "54a4748f19925f464b399ef2 0.000000 0.000000 0.000000 \n", + "54a4356a19925f464b3875bb 0.147102 0.126292 0.126292 \n", + "54a4697e6529d92b2c0279d3 0.000000 0.000000 0.000000 \n", + "54a45e426529d92b2c02488f 0.000000 0.000000 0.000000 \n", + "54a452c96529d92b2c01f889 0.000000 0.000000 0.000000 \n", + "54a4323619925f464b384bcc 0.000000 0.000000 0.000000 \n", + "54a4259119925f464b37af9c 0.000000 0.000000 0.000000 \n", + "54a431da6529d92b2c014ee9 0.000000 0.000000 0.000000 \n", + "54a426fd19925f464b37c125 0.000000 0.000000 0.000000 \n", + "54a47bb019925f464b39b9b7 0.000000 0.000000 0.000000 \n", + "54a434d819925f464b386e62 0.000000 0.000000 0.000000 \n", + "54a428116529d92b2c00d1a7 0.000000 0.000000 0.000000 \n", + "54a436036529d92b2c01859e 0.000000 0.000000 0.000000 \n", + "54a47edf19925f464b39c58d 0.000000 0.000000 0.000000 \n", + "54a419706529d92b2c006650 0.000000 0.000000 0.000000 \n", + "54a4349619925f464b386b12 0.000000 0.107952 0.000000 \n", + "54a4340f6529d92b2c016be8 0.000000 0.000000 0.000000 \n", + "54a40e546529d92b2c004606 0.000000 0.000000 0.000000 \n", + "54a428b419925f464b37d5ce 0.000000 0.103235 0.000000 \n", + "54a453a519925f464b38fd16 0.000000 0.000000 0.000000 \n", + "54a41cb219925f464b376d82 0.000000 0.000000 0.000000 \n", + "54a431896529d92b2c014b27 0.000000 0.000000 0.000000 \n", + "54a423ab19925f464b3799f2 0.000000 0.132349 0.000000 \n", + "54a47c1419925f464b39bb28 0.000000 0.000000 0.000000 \n", + "593ee3ba12c27b182380821f 0.101522 0.000000 0.087160 \n", + "54a456366529d92b2c02235a 0.000000 0.000000 0.000000 \n", + "54a452d419925f464b38f1b5 0.000000 0.000000 0.000000 \n", + "54a4659b6529d92b2c026a53 0.000000 0.000000 0.000000 \n", + "54a46d5d19925f464b3982d3 0.000000 0.000000 0.000000 \n", + "54a4582119925f464b3927a1 0.000000 0.000000 0.000000 \n", + "54a4205319925f464b377c9f 0.000000 0.000000 0.000000 \n", + "54a470cc19925f464b39906b 0.000000 0.000000 0.168937 \n", + "54a44f4a6529d92b2c01de45 0.000000 0.000000 0.000000 \n", + "592ef494ae10ad089795ebfa 0.147252 0.000000 0.126421 \n", + "54a41f016529d92b2c00757d 0.000000 0.000000 0.000000 \n", + "54a436266529d92b2c01876e 0.000000 0.000000 0.000000 \n", + "54a428bd19925f464b37d63e 0.000000 0.000000 0.000000 \n", + "54a45a4f6529d92b2c0234d1 0.000000 0.000000 0.000000 \n", + "54a41ed76529d92b2c007440 0.000000 0.000000 0.000000 \n", + "54a42c906529d92b2c010b74 0.000000 0.148937 0.000000 \n", + "569519a6dc18ea6c22c9b9ab 0.000000 0.000000 0.000000 \n", + "54a438d56529d92b2c019648 0.000000 0.000000 0.121613 \n", + "\n", + " wine vinegar yukon yukon gold \\\n", + "id \n", + "54a4270b19925f464b37c1dc 0.252641 0.000000 0.000000 \n", + "54a42cde19925f464b3809d2 0.000000 0.000000 0.000000 \n", + "54a433036529d92b2c015de3 0.000000 0.089666 0.089666 \n", + "54a451926529d92b2c01eda8 0.000000 0.000000 0.000000 \n", + "54a430876529d92b2c013e2b 0.000000 0.000000 0.000000 \n", + "54a453df6529d92b2c020687 0.000000 0.000000 0.000000 \n", + "55b0e7116284773353bf4580 0.000000 0.000000 0.000000 \n", + "54a42bab6529d92b2c00ffa7 0.000000 0.000000 0.000000 \n", + "54a4748f19925f464b399ef2 0.000000 0.154879 0.154879 \n", + "54a4356a19925f464b3875bb 0.132204 0.000000 0.000000 \n", + "54a4697e6529d92b2c0279d3 0.000000 0.000000 0.000000 \n", + "54a45e426529d92b2c02488f 0.000000 0.000000 0.000000 \n", + "54a452c96529d92b2c01f889 0.000000 0.000000 0.000000 \n", + "54a4323619925f464b384bcc 0.000000 0.000000 0.000000 \n", + "54a4259119925f464b37af9c 0.000000 0.000000 0.000000 \n", + "54a431da6529d92b2c014ee9 0.000000 0.184132 0.184132 \n", + "54a426fd19925f464b37c125 0.000000 0.000000 0.000000 \n", + "54a47bb019925f464b39b9b7 0.000000 0.000000 0.000000 \n", + "54a434d819925f464b386e62 0.000000 0.000000 0.000000 \n", + "54a428116529d92b2c00d1a7 0.000000 0.000000 0.000000 \n", + "54a436036529d92b2c01859e 0.000000 0.000000 0.000000 \n", + "54a47edf19925f464b39c58d 0.000000 0.000000 0.000000 \n", + "54a419706529d92b2c006650 0.000000 0.000000 0.000000 \n", + "54a4349619925f464b386b12 0.000000 0.000000 0.000000 \n", + "54a4340f6529d92b2c016be8 0.000000 0.000000 0.000000 \n", + "54a40e546529d92b2c004606 0.000000 0.000000 0.000000 \n", + "54a428b419925f464b37d5ce 0.000000 0.000000 0.000000 \n", + "54a453a519925f464b38fd16 0.000000 0.000000 0.000000 \n", + "54a41cb219925f464b376d82 0.000000 0.000000 0.000000 \n", + "54a431896529d92b2c014b27 0.000000 0.000000 0.000000 \n", + "54a423ab19925f464b3799f2 0.000000 0.000000 0.000000 \n", + "54a47c1419925f464b39bb28 0.000000 0.000000 0.000000 \n", + "593ee3ba12c27b182380821f 0.091240 0.000000 0.000000 \n", + "54a456366529d92b2c02235a 0.000000 0.000000 0.000000 \n", + "54a452d419925f464b38f1b5 0.000000 0.000000 0.000000 \n", + "54a4659b6529d92b2c026a53 0.000000 0.000000 0.000000 \n", + "54a46d5d19925f464b3982d3 0.000000 0.000000 0.000000 \n", + "54a4582119925f464b3927a1 0.000000 0.000000 0.000000 \n", + "54a4205319925f464b377c9f 0.000000 0.000000 0.000000 \n", + "54a470cc19925f464b39906b 0.176845 0.000000 0.000000 \n", + "54a44f4a6529d92b2c01de45 0.000000 0.000000 0.000000 \n", + "592ef494ae10ad089795ebfa 0.132339 0.000000 0.000000 \n", + "54a41f016529d92b2c00757d 0.000000 0.000000 0.000000 \n", + "54a436266529d92b2c01876e 0.000000 0.000000 0.000000 \n", + "54a428bd19925f464b37d63e 0.000000 0.000000 0.000000 \n", + "54a45a4f6529d92b2c0234d1 0.000000 0.000000 0.000000 \n", + "54a41ed76529d92b2c007440 0.000000 0.000000 0.000000 \n", + "54a42c906529d92b2c010b74 0.000000 0.000000 0.000000 \n", + "569519a6dc18ea6c22c9b9ab 0.000000 0.000000 0.000000 \n", + "54a438d56529d92b2c019648 0.127306 0.000000 0.000000 \n", + "\n", + " yukon gold potato \n", + "id \n", + "54a4270b19925f464b37c1dc 0.000000 \n", + "54a42cde19925f464b3809d2 0.000000 \n", + "54a433036529d92b2c015de3 0.089666 \n", + "54a451926529d92b2c01eda8 0.000000 \n", + "54a430876529d92b2c013e2b 0.000000 \n", + "54a453df6529d92b2c020687 0.000000 \n", + "55b0e7116284773353bf4580 0.000000 \n", + "54a42bab6529d92b2c00ffa7 0.000000 \n", + "54a4748f19925f464b399ef2 0.154879 \n", + "54a4356a19925f464b3875bb 0.000000 \n", + "54a4697e6529d92b2c0279d3 0.000000 \n", + "54a45e426529d92b2c02488f 0.000000 \n", + "54a452c96529d92b2c01f889 0.000000 \n", + "54a4323619925f464b384bcc 0.000000 \n", + "54a4259119925f464b37af9c 0.000000 \n", + "54a431da6529d92b2c014ee9 0.184132 \n", + "54a426fd19925f464b37c125 0.000000 \n", + "54a47bb019925f464b39b9b7 0.000000 \n", + "54a434d819925f464b386e62 0.000000 \n", + "54a428116529d92b2c00d1a7 0.000000 \n", + "54a436036529d92b2c01859e 0.000000 \n", + "54a47edf19925f464b39c58d 0.000000 \n", + "54a419706529d92b2c006650 0.000000 \n", + "54a4349619925f464b386b12 0.000000 \n", + "54a4340f6529d92b2c016be8 0.000000 \n", + "54a40e546529d92b2c004606 0.000000 \n", + "54a428b419925f464b37d5ce 0.000000 \n", + "54a453a519925f464b38fd16 0.000000 \n", + "54a41cb219925f464b376d82 0.000000 \n", + "54a431896529d92b2c014b27 0.000000 \n", + "54a423ab19925f464b3799f2 0.000000 \n", + "54a47c1419925f464b39bb28 0.000000 \n", + "593ee3ba12c27b182380821f 0.000000 \n", + "54a456366529d92b2c02235a 0.000000 \n", + "54a452d419925f464b38f1b5 0.000000 \n", + "54a4659b6529d92b2c026a53 0.000000 \n", + "54a46d5d19925f464b3982d3 0.000000 \n", + "54a4582119925f464b3927a1 0.000000 \n", + "54a4205319925f464b377c9f 0.000000 \n", + "54a470cc19925f464b39906b 0.000000 \n", + "54a44f4a6529d92b2c01de45 0.000000 \n", + "592ef494ae10ad089795ebfa 0.000000 \n", + "54a41f016529d92b2c00757d 0.000000 \n", + "54a436266529d92b2c01876e 0.000000 \n", + "54a428bd19925f464b37d63e 0.000000 \n", + "54a45a4f6529d92b2c0234d1 0.000000 \n", + "54a41ed76529d92b2c007440 0.000000 \n", + "54a42c906529d92b2c010b74 0.000000 \n", + "569519a6dc18ea6c22c9b9ab 0.000000 \n", + "54a438d56529d92b2c019648 0.000000 \n", + "\n", + "[50 rows x 283 columns]" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "transformed_recipe" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['English',\n", + " 'English hothouse',\n", + " 'English hothouse cucumber',\n", + " 'available',\n", + " 'baby',\n", + " 'baking',\n", + " 'baking powder',\n", + " 'bay',\n", + " 'bay leave',\n", + " 'beef',\n", + " 'bell',\n", + " 'bell pepper',\n", + " 'black',\n", + " 'black pepper',\n", + " 'bread',\n", + " 'brk',\n", + " 'broth',\n", + " 'brown',\n", + " 'brown sugar',\n", + " 'bunch',\n", + " 'butter',\n", + " 'can',\n", + " 'carrot',\n", + " 'cayenne',\n", + " 'cayenne pepper',\n", + " 'celery',\n", + " 'cheese',\n", + " 'cherry',\n", + " 'chicken',\n", + " 'chile',\n", + " 'chop',\n", + " 'chop fresh',\n", + " 'chop fresh cilantro',\n", + " 'chop onion',\n", + " 'chop red',\n", + " 'cilantro',\n", + " 'cinnamon',\n", + " 'clove',\n", + " 'clove mince',\n", + " 'coarse',\n", + " 'coarse kosher',\n", + " 'coarse kosher salt',\n", + " 'core',\n", + " 'core cut',\n", + " 'core cut inch',\n", + " 'coriander',\n", + " 'cream',\n", + " 'crosswise',\n", + " 'cube',\n", + " 'cucumber',\n", + " 'cumin',\n", + " 'cup',\n", + " 'cup chop',\n", + " 'cup chop onion',\n", + " 'cup dry',\n", + " 'cup fresh',\n", + " 'cup grate',\n", + " 'cup olive',\n", + " 'cup olive oil',\n", + " 'cup pack',\n", + " 'cup purpose',\n", + " 'cup purpose flour',\n", + " 'cup slice',\n", + " 'cup sour',\n", + " 'cup sour cream',\n", + " 'cup stick',\n", + " 'cup sugar',\n", + " 'cup tablespoon',\n", + " 'cup water',\n", + " 'curry',\n", + " 'curry powder',\n", + " 'cut',\n", + " 'cut inch',\n", + " 'cut inch cube',\n", + " 'cut inch thick',\n", + " 'dark',\n", + " 'dark brown',\n", + " 'dark brown sugar',\n", + " 'dijon',\n", + " 'dijon mustard',\n", + " 'distil',\n", + " 'distil white',\n", + " 'distil white vinegar',\n", + " 'divide',\n", + " 'drain',\n", + " 'dry',\n", + " 'egg',\n", + " 'equipment',\n", + " 'extra',\n", + " 'extra virgin',\n", + " 'extra virgin olive',\n", + " 'extra virgin olive oil',\n", + " 'extract',\n", + " 'fillet',\n", + " 'firm',\n", + " 'flour',\n", + " 'food',\n", + " 'food store',\n", + " 'fresh',\n", + " 'fresh cilantro',\n", + " 'fresh lemon',\n", + " 'fresh lemon juice',\n", + " 'fresh lime',\n", + " 'fresh lime juice',\n", + " 'fresh mint',\n", + " 'garlic',\n", + " 'garlic clove',\n", + " 'garlic clove mince',\n", + " 'garlic powder',\n", + " 'garnish',\n", + " 'ginger',\n", + " 'gold',\n", + " 'gold potato',\n", + " 'golden',\n", + " 'golden brown',\n", + " 'grain',\n", + " 'grate',\n", + " 'grate lemon',\n", + " 'green',\n", + " 'green onion',\n", + " 'ground',\n", + " 'ground black',\n", + " 'ground black pepper',\n", + " 'ground cinnamon',\n", + " 'ground cumin',\n", + " 'halve',\n", + " 'halve pit',\n", + " 'hot',\n", + " 'hothouse',\n", + " 'hothouse cucumber',\n", + " 'inch',\n", + " 'inch cube',\n", + " 'inch long',\n", + " 'inch piece',\n", + " 'inch thick',\n", + " 'jalape',\n", + " 'juice',\n", + " 'kosher',\n", + " 'kosher salt',\n", + " 'large',\n", + " 'large egg',\n", + " 'large garlic',\n", + " 'large garlic clove',\n", + " 'leave',\n", + " 'lemon',\n", + " 'lemon juice',\n", + " 'light',\n", + " 'lime',\n", + " 'lime juice',\n", + " 'liqueur',\n", + " 'long',\n", + " 'low',\n", + " 'low salt',\n", + " 'medium',\n", + " 'milk',\n", + " 'mince',\n", + " 'mint',\n", + " 'mustard',\n", + " 'oil',\n", + " 'olive',\n", + " 'olive oil',\n", + " 'onion',\n", + " 'onion chop',\n", + " 'onion slice',\n", + " 'orange',\n", + " 'other',\n", + " 'ounce',\n", + " 'ounce can',\n", + " 'pack',\n", + " 'pack dark',\n", + " 'pack dark brown',\n", + " 'pack dark brown sugar',\n", + " 'parsley',\n", + " 'paste',\n", + " 'peel',\n", + " 'pepper',\n", + " 'piece',\n", + " 'pinch',\n", + " 'pit',\n", + " 'potato',\n", + " 'pound',\n", + " 'powder',\n", + " 'purpose',\n", + " 'purpose flour',\n", + " 'red',\n", + " 'red bell',\n", + " 'red bell pepper',\n", + " 'red onion',\n", + " 'red wine',\n", + " 'rice',\n", + " 'roast',\n", + " 'romaine',\n", + " 'room',\n", + " 'room temperature',\n", + " 'rosemary',\n", + " 'salt',\n", + " 'salt ground',\n", + " 'sauce',\n", + " 'scallion',\n", + " 'seed',\n", + " 'sesame',\n", + " 'shallot',\n", + " 'slice',\n", + " 'small',\n", + " 'sour',\n", + " 'sour cream',\n", + " 'soy',\n", + " 'soy sauce',\n", + " 'special',\n", + " 'special equipment',\n", + " 'specialty',\n", + " 'specialty food',\n", + " 'sprig',\n", + " 'stem',\n", + " 'stick',\n", + " 'store',\n", + " 'strip',\n", + " 'style',\n", + " 'such',\n", + " 'sugar',\n", + " 'tablespoon',\n", + " 'tablespoon chop',\n", + " 'tablespoon chop fresh',\n", + " 'tablespoon chop fresh cilantro',\n", + " 'tablespoon extra',\n", + " 'tablespoon extra virgin',\n", + " 'tablespoon extra virgin olive',\n", + " 'tablespoon fresh',\n", + " 'tablespoon fresh lemon',\n", + " 'tablespoon fresh lemon juice',\n", + " 'tablespoon light',\n", + " 'tablespoon olive',\n", + " 'tablespoon olive oil',\n", + " 'tablespoon sugar',\n", + " 'tablespoon white',\n", + " 'tablespoon white wine',\n", + " 'tablespoon white wine vinegar',\n", + " 'taste',\n", + " 'teaspoon',\n", + " 'teaspoon baking',\n", + " 'teaspoon dry',\n", + " 'teaspoon grate',\n", + " 'teaspoon grate lemon',\n", + " 'teaspoon ground',\n", + " 'teaspoon ground cumin',\n", + " 'teaspoon salt',\n", + " 'teaspoon vanilla',\n", + " 'teaspoon vanilla extract',\n", + " 'temperature',\n", + " 'thick',\n", + " 'thick slice',\n", + " 'thyme',\n", + " 'to',\n", + " 'to taste',\n", + " 'toast',\n", + " 'tomato',\n", + " 'tomato halve',\n", + " 'trim',\n", + " 'unpeeled',\n", + " 'unsalt',\n", + " 'unsalt butter',\n", + " 'unsweetened',\n", + " 'use',\n", + " 'vanilla',\n", + " 'vanilla extract',\n", + " 'vegetable',\n", + " 'vegetable oil',\n", + " 'vinegar',\n", + " 'virgin',\n", + " 'virgin olive',\n", + " 'virgin olive oil',\n", + " 'water',\n", + " 'wedge',\n", + " 'white',\n", + " 'white vinegar',\n", + " 'white wine',\n", + " 'white wine vinegar',\n", + " 'whole',\n", + " 'wine',\n", + " 'wine vinegar',\n", + " 'yukon',\n", + " 'yukon gold',\n", + " 'yukon gold potato']" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "transformed_recipe.columns.tolist()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
dekhedaggregateRatingingredientsprepStepsreviewsCountwillMakeAgainPctingredients_lemmafiedcuisine_namephoto_filenamephoto_creditauthor_namedate_publishedrecipe_url
id
54a4270b19925f464b37c1dcGrilled Hearts of Romaine with Blue Cheese Vin...3.64[1 1/2 cups white wine vinegar, 1/2 cup sugar,...[Combine first 5 ingredients and 1/4 teaspoon ...9100cup white wine vinegar brk cup sugar brk cup w...Missing CuisineEP_12162015_placeholders_casual.jpgPhoto by Chelsea Kyle, Prop Styling by Rhoda B...Kate Higgins2010-12-16 04:00:00+00:00https://www.epicurious.com/recipes/food/views/...
54a42cde19925f464b3809d2Green chiles pickled in soy sauce and vinegar ...Soy-Pickled Jalapeños3.43[3 large fresh jalapeños (4 inches), sliced 1/...[Combine all ingredients in a small heavy sauc...6100large fresh jalapeño inch slice inch thick brk...Missing CuisineEP_12162015_placeholders_rustic.jpgPhoto by Chelsea Kyle, Prop Styling by Anna St...Lillian Chou2009-02-19 04:00:00+00:00https://www.epicurious.com/recipes/food/views/...
54a433036529d92b2c015de3This soup features the flavors of India: aroma...Curried Potato and Spinach Soup with Onion Sal...3.00[4 cups chopped red onions (about 2 large), 1 ...[Combine first 5 ingredients in heavy medium s...667cup chop red onion large brk tablespoon sunflo...Indian234125.jpgBrian LeatartPeter Gordon2006-03-07 04:00:00+00:00https://www.epicurious.com/recipes/food/views/...
54a451926529d92b2c01eda8Chicken Soup3.19[1 pound chicken parts, 2 stalks celery, inclu...[1. Pour 12 cups of cold water into a large st...3287pound chicken part brk stalk celery include le...KosherEP_12162015_placeholders_formal.jpgPhoto by Chelsea Kyle, Prop Styling by Rhoda B...Sharon Lebewohl2004-08-20 04:00:00+00:00https://www.epicurious.com/recipes/food/views/...
54a430876529d92b2c013e2bBrown sugar and molasses are balanced by fresh...Sweet-Hot Barbecue Sauce0.00[2 tablespoons olive oil, 1 cup chopped onion,...[Heat oil in large saucepan over medium-high h...00tablespoon olive oil brk cup chop onion brk cu...Missing CuisineEP_12162015_placeholders_rustic.jpgPhoto by Chelsea Kyle, Prop Styling by Anna St...Suzanne Tracht2007-12-03 20:11:11+00:00https://www.epicurious.com/recipes/food/views/...
\n", + "
" + ], + "text/plain": [ + " dek \\\n", + "id \n", + "54a4270b19925f464b37c1dc \n", + "54a42cde19925f464b3809d2 Green chiles pickled in soy sauce and vinegar ... \n", + "54a433036529d92b2c015de3 This soup features the flavors of India: aroma... \n", + "54a451926529d92b2c01eda8 \n", + "54a430876529d92b2c013e2b Brown sugar and molasses are balanced by fresh... \n", + "\n", + " hed \\\n", + "id \n", + "54a4270b19925f464b37c1dc Grilled Hearts of Romaine with Blue Cheese Vin... \n", + "54a42cde19925f464b3809d2 Soy-Pickled Jalapeños \n", + "54a433036529d92b2c015de3 Curried Potato and Spinach Soup with Onion Sal... \n", + "54a451926529d92b2c01eda8 Chicken Soup \n", + "54a430876529d92b2c013e2b Sweet-Hot Barbecue Sauce \n", + "\n", + " aggregateRating \\\n", + "id \n", + "54a4270b19925f464b37c1dc 3.64 \n", + "54a42cde19925f464b3809d2 3.43 \n", + "54a433036529d92b2c015de3 3.00 \n", + "54a451926529d92b2c01eda8 3.19 \n", + "54a430876529d92b2c013e2b 0.00 \n", + "\n", + " ingredients \\\n", + "id \n", + "54a4270b19925f464b37c1dc [1 1/2 cups white wine vinegar, 1/2 cup sugar,... \n", + "54a42cde19925f464b3809d2 [3 large fresh jalapeños (4 inches), sliced 1/... \n", + "54a433036529d92b2c015de3 [4 cups chopped red onions (about 2 large), 1 ... \n", + "54a451926529d92b2c01eda8 [1 pound chicken parts, 2 stalks celery, inclu... \n", + "54a430876529d92b2c013e2b [2 tablespoons olive oil, 1 cup chopped onion,... \n", + "\n", + " prepSteps \\\n", + "id \n", + "54a4270b19925f464b37c1dc [Combine first 5 ingredients and 1/4 teaspoon ... \n", + "54a42cde19925f464b3809d2 [Combine all ingredients in a small heavy sauc... \n", + "54a433036529d92b2c015de3 [Combine first 5 ingredients in heavy medium s... \n", + "54a451926529d92b2c01eda8 [1. Pour 12 cups of cold water into a large st... \n", + "54a430876529d92b2c013e2b [Heat oil in large saucepan over medium-high h... \n", + "\n", + " reviewsCount willMakeAgainPct \\\n", + "id \n", + "54a4270b19925f464b37c1dc 9 100 \n", + "54a42cde19925f464b3809d2 6 100 \n", + "54a433036529d92b2c015de3 6 67 \n", + "54a451926529d92b2c01eda8 32 87 \n", + "54a430876529d92b2c013e2b 0 0 \n", + "\n", + " ingredients_lemmafied \\\n", + "id \n", + "54a4270b19925f464b37c1dc cup white wine vinegar brk cup sugar brk cup w... \n", + "54a42cde19925f464b3809d2 large fresh jalapeño inch slice inch thick brk... \n", + "54a433036529d92b2c015de3 cup chop red onion large brk tablespoon sunflo... \n", + "54a451926529d92b2c01eda8 pound chicken part brk stalk celery include le... \n", + "54a430876529d92b2c013e2b tablespoon olive oil brk cup chop onion brk cu... \n", + "\n", + " cuisine_name \\\n", + "id \n", + "54a4270b19925f464b37c1dc Missing Cuisine \n", + "54a42cde19925f464b3809d2 Missing Cuisine \n", + "54a433036529d92b2c015de3 Indian \n", + "54a451926529d92b2c01eda8 Kosher \n", + "54a430876529d92b2c013e2b Missing Cuisine \n", + "\n", + " photo_filename \\\n", + "id \n", + "54a4270b19925f464b37c1dc EP_12162015_placeholders_casual.jpg \n", + "54a42cde19925f464b3809d2 EP_12162015_placeholders_rustic.jpg \n", + "54a433036529d92b2c015de3 234125.jpg \n", + "54a451926529d92b2c01eda8 EP_12162015_placeholders_formal.jpg \n", + "54a430876529d92b2c013e2b EP_12162015_placeholders_rustic.jpg \n", + "\n", + " photo_credit \\\n", + "id \n", + "54a4270b19925f464b37c1dc Photo by Chelsea Kyle, Prop Styling by Rhoda B... \n", + "54a42cde19925f464b3809d2 Photo by Chelsea Kyle, Prop Styling by Anna St... \n", + "54a433036529d92b2c015de3 Brian Leatart \n", + "54a451926529d92b2c01eda8 Photo by Chelsea Kyle, Prop Styling by Rhoda B... \n", + "54a430876529d92b2c013e2b Photo by Chelsea Kyle, Prop Styling by Anna St... \n", + "\n", + " author_name date_published \\\n", + "id \n", + "54a4270b19925f464b37c1dc Kate Higgins 2010-12-16 04:00:00+00:00 \n", + "54a42cde19925f464b3809d2 Lillian Chou 2009-02-19 04:00:00+00:00 \n", + "54a433036529d92b2c015de3 Peter Gordon 2006-03-07 04:00:00+00:00 \n", + "54a451926529d92b2c01eda8 Sharon Lebewohl 2004-08-20 04:00:00+00:00 \n", + "54a430876529d92b2c013e2b Suzanne Tracht 2007-12-03 20:11:11+00:00 \n", + "\n", + " recipe_url \n", + "id \n", + "54a4270b19925f464b37c1dc https://www.epicurious.com/recipes/food/views/... \n", + "54a42cde19925f464b3809d2 https://www.epicurious.com/recipes/food/views/... \n", + "54a433036529d92b2c015de3 https://www.epicurious.com/recipes/food/views/... \n", + "54a451926529d92b2c01eda8 https://www.epicurious.com/recipes/food/views/... \n", + "54a430876529d92b2c013e2b https://www.epicurious.com/recipes/food/views/... " + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "to_nlp_df.head()" + ] + }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "# cv_params are parameters for the sklearn CountVectorizer or TFIDFVectorizer\n", - "sklearn_transformer_params = { \n", - " 'analyzer': CustomSKLearnAnalyzer.ngrams_maker(\n", - " min_ngram_length=1,\n", - " max_ngram_length=4\n", - " ),\n", - " 'min_df':3,\n", - " 'binary':False\n", - "}\n", - "\n", - "sklearn_transformer = TfidfVectorizer(**sklearn_transformer_params)\n", + "# Prepare whole dataframe for new processing\n", + "import mlflow\n", + "from mlflow.models import infer_signature\n", + "from src.custom_stanza_mlflow import CustomSKLearnWrapper" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# this function allows us to get the experiment ID from an experiment name\n", + "def get_experiment_id(name):\n", + " exp = mlflow.get_experiment_by_name(name)\n", + " if exp is None:\n", + " exp_id = mlflow.create_experiment(name)\n", + " return exp_id\n", + " return exp.experiment_id" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
Repository initialized!\n",
+       "
\n" + ], + "text/plain": [ + "Repository initialized!\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "#@markdown Enter the username of your DAGsHub account:\n", + "DAGSHUB_USER_NAME = \"AaronWChen\" #@param {type:\"string\"}\n", "\n", - "model_input = to_nlp_df['ingredients_lemmafied']\n", + "#@markdown Enter the email for your DAGsHub account:\n", + "DAGSHUB_EMAIL = \"awc33@cornell.edu\" #@param {type:\"string\"}\n", "\n", - "# Do fit transform on data\n", - "print(\"fit_transform start: \" + str(datetime.now()))\n", - "response = sklearn_transformer.fit_transform(tqdm(model_input)) \n", - "print(\"fit_transform end: \" + str(datetime.now()))\n", + "#@markdown Enter the repo name \n", + "DAGSHUB_REPO_NAME = \"MeaLeon\"\n", "\n", - "transformed_recipe = pd.DataFrame(\n", - " response.toarray(),\n", - " columns=sklearn_transformer.get_feature_names_out(),\n", - " index=model_input.index\n", - ")\n", - "\n", - "print(transformed_recipe.columns)" + "#@markdown Enter the name of the branch you are working on \n", + "BRANCH = \"NGRAM-1/try-llm-code-speedup\"\n", + "dagshub.init(repo_name=DAGSHUB_REPO_NAME\n", + " , repo_owner=DAGSHUB_USER_NAME)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Starting DEV stage for TFIDF Encoded model" ] }, { @@ -218,31 +2815,326 @@ "metadata": {}, "outputs": [], "source": [ - "transformed_recipe" + "mlflow.set_tracking_uri(f'https://dagshub.com/{DAGSHUB_USER_NAME}/MeaLeon.mlflow')\n", + "\n", + "# starter idea for making an experiment name can be the git branch, but need more specificity\n", + "experiment_name = f\"{DAGSHUB_EMAIL}/OHE_up_to_quadgrams\"\n", + "mlflow_exp_id = get_experiment_id(experiment_name)\n", + "\n", + "# define model location\n", + "# model_directory = \"/tmp/sklearn_model\"\n", + "model_directory = \"../models/sklearn_model\"\n", + "\n", + "# Define the required artifacts associated with the saved custom pyfunc\n", + "# sklearn_path = model_directory + \"\"\n", + "sklearn_model_path = model_directory + \"/python_model.pkl\"\n", + "sklearn_transformer_path = model_directory + \"/sklearn_transformer.pkl\"\n", + "transformed_recipes_path = model_directory + \"/transformed_recipes.pkl\"\n", + "\n", + "artifacts = {'sklearn_model': sklearn_model_path,\n", + " 'sklearn_transformer': sklearn_transformer_path,\n", + " 'transformed_recipes': transformed_recipes_path\n", + " }\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Preprocess start: 2024-03-20 07:38:00.336408\n", + "Preprocess end: 2024-03-21 01:19:33.081564\n", + "\n", + "\n", + "--------------\n", + "Preprocessed Dataframe:\n", + " dek \\\n", + "id \n", + "54a2b6b019925f464b373351 How does fried chicken achieve No. 1 status? B... \n", + "54a408a019925f464b3733bc Spinaci all'Ebraica \n", + "54a408a26529d92b2c003631 This majestic, moist, and richly spiced honey ... \n", + "54a408a66529d92b2c003638 The idea for this sandwich came to me when my ... \n", + "54a408a719925f464b3733cc In 1930, Simon Agranat, the chief justice of t... \n", + "\n", + " hed \\\n", + "id \n", + "54a2b6b019925f464b373351 Pickle-Brined Fried Chicken \n", + "54a408a019925f464b3733bc Spinach Jewish Style \n", + "54a408a26529d92b2c003631 New Year’s Honey Cake \n", + "54a408a66529d92b2c003638 The B.L.A.—Bagel with Lox and Avocado \n", + "54a408a719925f464b3733cc Shakshuka a la Doktor Shakshuka \n", + "\n", + " aggregateRating \\\n", + "id \n", + "54a2b6b019925f464b373351 3.11 \n", + "54a408a019925f464b3733bc 3.22 \n", + "54a408a26529d92b2c003631 3.62 \n", + "54a408a66529d92b2c003638 4.00 \n", + "54a408a719925f464b3733cc 2.71 \n", + "\n", + " ingredients \\\n", + "id \n", + "54a2b6b019925f464b373351 [1 tablespoons yellow mustard seeds, 1 tablesp... \n", + "54a408a019925f464b3733bc [3 pounds small-leaved bulk spinach, Salt, 1/2... \n", + "54a408a26529d92b2c003631 [3 1/2 cups all-purpose flour, 1 tablespoon ba... \n", + "54a408a66529d92b2c003638 [1 small ripe avocado, preferably Hass (see No... \n", + "54a408a719925f464b3733cc [2 pounds fresh tomatoes, unpeeled and cut in ... \n", + "\n", + " prepSteps \\\n", + "id \n", + "54a2b6b019925f464b373351 [Toast mustard and coriander seeds in a dry me... \n", + "54a408a019925f464b3733bc [Remove the stems and roots from the spinach. ... \n", + "54a408a26529d92b2c003631 [I like this cake best baked in a 9-inch angel... \n", + "54a408a66529d92b2c003638 [A short time before serving, mash avocado and... \n", + "54a408a719925f464b3733cc [1. Place the tomatoes, garlic, salt, paprika,... \n", + "\n", + " reviewsCount willMakeAgainPct \\\n", + "id \n", + "54a2b6b019925f464b373351 7 100 \n", + "54a408a019925f464b3733bc 5 80 \n", + "54a408a26529d92b2c003631 105 88 \n", + "54a408a66529d92b2c003638 7 100 \n", + "54a408a719925f464b3733cc 7 83 \n", + "\n", + " ingredients_lemmafied \\\n", + "id \n", + "54a2b6b019925f464b373351 tablespoon yellow mustard seed brk tablespoon ... \n", + "54a408a019925f464b3733bc pound small leave bulk spinach brk salt brk cu... \n", + "54a408a26529d92b2c003631 cup purpose flour brk tablespoon baking powder... \n", + "54a408a66529d92b2c003638 small ripe avocado hass see note brk teaspoon ... \n", + "54a408a719925f464b3733cc pound fresh tomato unpeeled cut quarter ounce ... \n", + "\n", + " cuisine_name \\\n", + "id \n", + "54a2b6b019925f464b373351 Missing Cuisine \n", + "54a408a019925f464b3733bc Italian \n", + "54a408a26529d92b2c003631 Kosher \n", + "54a408a66529d92b2c003638 Kosher \n", + "54a408a719925f464b3733cc Kosher \n", + "\n", + " photo_filename \\\n", + "id \n", + "54a2b6b019925f464b373351 51247610_fried-chicken_1x1.jpg \n", + "54a408a019925f464b3733bc EP_12162015_placeholders_rustic.jpg \n", + "54a408a26529d92b2c003631 EP_09022015_honeycake-2.jpg \n", + "54a408a66529d92b2c003638 EP_12162015_placeholders_casual.jpg \n", + "54a408a719925f464b3733cc EP_12162015_placeholders_formal.jpg \n", + "\n", + " photo_credit \\\n", + "id \n", + "54a2b6b019925f464b373351 Michael Graydon and Nikole Herriott \n", + "54a408a019925f464b3733bc Photo by Chelsea Kyle, Prop Styling by Anna St... \n", + "54a408a26529d92b2c003631 Photo by Chelsea Kyle, Food Styling by Anna St... \n", + "54a408a66529d92b2c003638 Photo by Chelsea Kyle, Prop Styling by Rhoda B... \n", + "54a408a719925f464b3733cc Photo by Chelsea Kyle, Prop Styling by Rhoda B... \n", + "\n", + " author_name date_published \\\n", + "id \n", + "54a2b6b019925f464b373351 Missing Author Name 2014-08-19 04:00:00+00:00 \n", + "54a408a019925f464b3733bc Edda Servi Machlin 2008-09-09 04:00:00+00:00 \n", + "54a408a26529d92b2c003631 Marcy Goldman 2008-09-10 04:00:00+00:00 \n", + "54a408a66529d92b2c003638 Faye Levy 2008-09-08 04:00:00+00:00 \n", + "54a408a719925f464b3733cc Joan Nathan 2008-09-09 04:00:00+00:00 \n", + "\n", + " recipe_url \n", + "id \n", + "54a2b6b019925f464b373351 https://www.epicurious.com/recipes/food/views/... \n", + "54a408a019925f464b3733bc https://www.epicurious.com/recipes/food/views/... \n", + "54a408a26529d92b2c003631 https://www.epicurious.com/recipes/food/views/... \n", + "54a408a66529d92b2c003638 https://www.epicurious.com/recipes/food/views/... \n", + "54a408a719925f464b3733cc https://www.epicurious.com/recipes/food/views/... \n", + "(34756, 14)\n" + ] + } + ], + "source": [ + "# pre_proc_df is cleaned dataframe\n", + "print(\"Preprocess start: \" + str(datetime.now()))\n", + "whole_nlp_df = dfpp.preprocess_dataframe(raw_df)\n", + "print(\"Preprocess end: \" + str(datetime.now()))\n", + "print('\\n')\n", + "print('--------------')\n", + "print('Preprocessed Dataframe:', end='\\n')\n", + "print(whole_nlp_df.head())\n", + "print(whole_nlp_df.shape)" + ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "--------------------------------------------------------------------------------\n", + "sklearn fit transform on ingredients:\n", + "\n", + "\n", + "--------------------------------------------------------------------------------\n", + "Input Data: \n", + "id\n", + "54a2b6b019925f464b373351 tablespoon yellow mustard seed brk tablespoon ...\n", + "54a408a019925f464b3733bc pound small leave bulk spinach brk salt brk cu...\n", + "54a408a26529d92b2c003631 cup purpose flour brk tablespoon baking powder...\n", + "54a408a66529d92b2c003638 small ripe avocado hass see note brk teaspoon ...\n", + "54a408a719925f464b3733cc pound fresh tomato unpeeled cut quarter ounce ...\n", + " ... \n", + "59541a31bff3052847ae2107 tablespoon unsalt butter room temperature brk ...\n", + "5954233ad52ca90dc28200e7 tablespoon stick salt butter room temperature ...\n", + "595424c2109c972493636f83 tablespoon unsalted butter more greasing pan b...\n", + "5956638625dc3d1d829b7166 coarse salt brk lime wedge brk ounce tomato ju...\n", + "59566daa25dc3d1d829b7169 bottle millileter sour beer such almanac citra...\n", + "Name: ingredients_lemmafied, Length: 34756, dtype: object\n", + "\n", + "\n", + "--------------------------------------------------------------------------------\n", + "Input Data Shape: \n", + "(34756,)\n", + "\n", + "\n", + "--------------------------------------------------------------------------------\n", + "Random 3 Records from Input Data: \n", + "id\n", + "54a40caa19925f464b374017 boneless muscovy duck breast half pound total ...\n", + "55d4e08063b1ba1b5534b198 tablespoon white wine vinegar brk teaspoon sug...\n", + "54a43ad16529d92b2c019fc3 cup basmati rice ounce brk cup sweeten flake c...\n", + "Name: ingredients_lemmafied, dtype: object\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 34756/34756 [00:03<00:00, 11131.57it/s]\n", + "/home/awchen/Repos/Projects/MeaLeon/.venv/lib/python3.10/site-packages/mlflow/models/signature.py:213: UserWarning: Hint: Inferred schema contains integer column(s). Integer columns in Python cannot represent missing values. If your input data contains missing values at inference time, it will be encoded as floats and will cause a schema enforcement error. The best way to avoid this problem is to infer the model schema based on a realistic data sample (training dataset) that includes missing values. Alternatively, you can declare integer columns as doubles (float64) whenever these columns may have missing values. See `Handling Integers With Missing Values `_ for more details.\n", + " outputs = _infer_schema(model_output) if model_output is not None else None\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "--------------------------------------------------------------------------------\n", + "Transformed Data:\n", + " 100g 125g 13x9x2 150g 1pound 1tablespoon \\\n", + "id \n", + "54a2b6b019925f464b373351 0 0 0 0 0 0 \n", + "54a408a019925f464b3733bc 0 0 0 0 0 0 \n", + "54a408a26529d92b2c003631 0 0 0 0 0 0 \n", + "54a408a66529d92b2c003638 0 0 0 0 0 0 \n", + "54a408a719925f464b3733cc 0 0 0 0 0 0 \n", + "... ... ... ... ... ... ... \n", + "59541a31bff3052847ae2107 0 0 0 0 0 0 \n", + "5954233ad52ca90dc28200e7 0 0 0 0 0 0 \n", + "595424c2109c972493636f83 0 0 0 0 0 0 \n", + "5956638625dc3d1d829b7166 0 0 0 0 0 0 \n", + "59566daa25dc3d1d829b7169 0 0 0 0 0 0 \n", + "\n", + " 1teaspoon 200g 250g 2cup ... árbol divide \\\n", + "id ... \n", + "54a2b6b019925f464b373351 0 0 0 0 ... 0 \n", + "54a408a019925f464b3733bc 0 0 0 0 ... 0 \n", + "54a408a26529d92b2c003631 0 0 0 0 ... 0 \n", + "54a408a66529d92b2c003638 0 0 0 0 ... 0 \n", + "54a408a719925f464b3733cc 0 0 0 0 ... 0 \n", + "... ... ... ... ... ... ... \n", + "59541a31bff3052847ae2107 0 0 0 0 ... 0 \n", + "5954233ad52ca90dc28200e7 0 0 0 0 ... 0 \n", + "595424c2109c972493636f83 0 0 0 0 ... 0 \n", + "5956638625dc3d1d829b7166 0 0 0 0 ... 0 \n", + "59566daa25dc3d1d829b7169 0 0 0 0 ... 0 \n", + "\n", + " árbol seed árbol seed remove árbol stem \\\n", + "id \n", + "54a2b6b019925f464b373351 0 0 0 \n", + "54a408a019925f464b3733bc 0 0 0 \n", + "54a408a26529d92b2c003631 0 0 0 \n", + "54a408a66529d92b2c003638 0 0 0 \n", + "54a408a719925f464b3733cc 0 0 0 \n", + "... ... ... ... \n", + "59541a31bff3052847ae2107 0 0 0 \n", + "5954233ad52ca90dc28200e7 0 0 0 \n", + "595424c2109c972493636f83 0 0 0 \n", + "5956638625dc3d1d829b7166 0 0 0 \n", + "59566daa25dc3d1d829b7169 0 0 0 \n", + "\n", + " árbol teaspoon árbol teaspoon crush \\\n", + "id \n", + "54a2b6b019925f464b373351 0 0 \n", + "54a408a019925f464b3733bc 0 0 \n", + "54a408a26529d92b2c003631 0 0 \n", + "54a408a66529d92b2c003638 0 0 \n", + "54a408a719925f464b3733cc 0 0 \n", + "... ... ... \n", + "59541a31bff3052847ae2107 0 0 \n", + "5954233ad52ca90dc28200e7 0 0 \n", + "595424c2109c972493636f83 0 0 \n", + "5956638625dc3d1d829b7166 0 0 \n", + "59566daa25dc3d1d829b7169 0 0 \n", + "\n", + " árbol teaspoon crush red árbol wipe \\\n", + "id \n", + "54a2b6b019925f464b373351 0 0 \n", + "54a408a019925f464b3733bc 0 0 \n", + "54a408a26529d92b2c003631 0 0 \n", + "54a408a66529d92b2c003638 0 0 \n", + "54a408a719925f464b3733cc 0 0 \n", + "... ... ... \n", + "59541a31bff3052847ae2107 0 0 \n", + "5954233ad52ca90dc28200e7 0 0 \n", + "595424c2109c972493636f83 0 0 \n", + "5956638625dc3d1d829b7166 0 0 \n", + "59566daa25dc3d1d829b7169 0 0 \n", + "\n", + " árbol wipe clean épice \n", + "id \n", + "54a2b6b019925f464b373351 0 0 \n", + "54a408a019925f464b3733bc 0 0 \n", + "54a408a26529d92b2c003631 0 0 \n", + "54a408a66529d92b2c003638 0 0 \n", + "54a408a719925f464b3733cc 0 0 \n", + "... ... ... \n", + "59541a31bff3052847ae2107 0 0 \n", + "5954233ad52ca90dc28200e7 0 0 \n", + "595424c2109c972493636f83 0 0 \n", + "5956638625dc3d1d829b7166 0 0 \n", + "59566daa25dc3d1d829b7169 0 0 \n", + "\n", + "[34756 rows x 78378 columns]\n" + ] + }, + { + "ename": "", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[1;31mThe Kernel crashed while executing code in the current cell or a previous cell. \n", + "\u001b[1;31mPlease review the code in the cell(s) to identify a possible cause of the failure. \n", + "\u001b[1;31mClick here for more info. \n", + "\u001b[1;31mView Jupyter log for further details." + ] + } + ], "source": [ "# load from MLflow\n", "mlflow_client = mlflow.tracking.MlflowClient(\n", " tracking_uri=f'https://dagshub.com/{DAGSHUB_USER_NAME}/MeaLeon.mlflow')\n", "\n", "# cv_params are parameters for the sklearn CountVectorizer or TFIDFVectorizer\n", - "sklearn_transformer_params = {\n", - " 'analyzer': CustomSKLearnAnalyzer.ngrams_maker(\n", + "sklearn_transformer_params = { \n", + " 'analyzer': CustomSKLearnAnalyzer().ngram_maker(\n", " min_ngram_length=1,\n", - " max_ngram_length=4\n", + " max_ngram_length=4,\n", " ),\n", " 'min_df':3,\n", " 'binary':True\n", @@ -251,15 +3143,11 @@ "# pipeline_params are parameters that will be logged in MLFlow and are a superset of library parameters\n", "pipeline_params = {\n", " 'stanza_model': 'en',\n", - " 'sklearn-transformer': 'Tf'\n", + " 'sklearn-transformer': 'OHE'\n", "}\n", "\n", "# update the pipeline parameters with the library-specific ones so that they show up in MLflow Tracking\n", "pipeline_params.update(sklearn_transformer_params)\n", - "# pipeline_params.update(bertopic_params)\n", - "\n", - "# signature = infer_signature(model_input=to_nlp_df['ingredients'],\n", - "# )\n", "\n", "with mlflow.start_run(experiment_id=mlflow_exp_id): \n", " # LOG PARAMETERS\n", @@ -277,7 +3165,7 @@ " print('-' * 80)\n", " print('sklearn fit transform on ingredients:', end='\\n')\n", "\n", - " model_input = to_nlp_df['ingredients']\n", + " model_input = whole_nlp_df['ingredients_lemmafied']\n", "\n", " print('\\n')\n", " print('-' * 80)\n", @@ -336,17 +3224,60 @@ " model_info = mlflow.pyfunc.log_model( \n", " code_path=[\"../src/\"],\n", " python_model=CustomSKLearnWrapper(),\n", - " input_example=to_nlp_df['ingredients'][0],\n", + " input_example=whole_nlp_df['ingredients_lemmafied'][0],\n", " signature=signature, \n", " artifact_path=\"sklearn_model\",\n", " artifacts=artifacts\n", " ) \n", "\n", " # since this uses a custom Stanza analyzer, we have to use a custom mlflow.Pyfunc.PythonModel\n", - " \n", " " ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# pre_proc_df is cleaned dataframe\n", + "whole_nlp_df = dfpp.preprocess_dataframe(raw_df)\n", + "print('\\n')\n", + "print('--------------')\n", + "print('Preprocessed Dataframe:', end='\\n')\n", + "print(whole_nlp_df.head())\n", + "print(whole_nlp_df.shape)\n", + "\n", + "# cv_params are parameters for the sklearn CountVectorizer or TFIDFVectorizer\n", + "sklearn_transformer_params = { \n", + " 'analyzer': CustomSKLearnAnalyzer().ngram_maker(\n", + " min_ngram_length=1,\n", + " max_ngram_length=4,\n", + " ),\n", + " 'min_df':3,\n", + "}\n", + "\n", + "sklearn_transformer = TfidfVectorizer(**sklearn_transformer_params)\n", + "\n", + "model_input = whole_nlp_df['ingredients_lemmafied']\n", + "\n", + "# Do fit transform on data\n", + "print(\"fit_transform start: \" + str(datetime.now()))\n", + "response = sklearn_transformer.fit_transform(tqdm(model_input)) \n", + "print(\"fit_transform end: \" + str(datetime.now()))\n", + "\n", + "transformed_recipe = pd.DataFrame(\n", + " response.toarray(),\n", + " columns=sklearn_transformer.get_feature_names_out(),\n", + " index=model_input.index\n", + ")\n", + "\n", + "combined_df = pd.concat([transformed_recipe, whole_nlp_df], axis=1)\n", + "\n", + "with open(\"../joblib/2024.03.19/\", 'wb') as fo:\n", + " joblib.dump()\n" + ] + }, { "cell_type": "code", "execution_count": null,