diff --git a/nbs/13_new_preproc_bge_test.ipynb b/nbs/13_new_preproc_bge_test.ipynb index 0babf9d..4c7cc6c 100644 --- a/nbs/13_new_preproc_bge_test.ipynb +++ b/nbs/13_new_preproc_bge_test.ipynb @@ -24,7 +24,74 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "166c4c58fd4141b6bfbb32eb86fb379c", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.6.0.json: 0%| …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-03-20 07:34:33 INFO: Downloading default packages for language: en (English) ...\n", + "2024-03-20 07:34:33 INFO: File exists: /home/awchen/stanza_resources/en/default.zip\n", + "2024-03-20 07:34:36 INFO: Finished downloading models and saved to /home/awchen/stanza_resources.\n", + "2024-03-20 07:34:36 INFO: Checking for updates to resources.json in case models have been updated. Note: this behavior can be turned off with download_method=None or download_method=DownloadMethod.REUSE_RESOURCES\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "380b40c3a490494cbc23c9d71496ab16", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.6.0.json: 0%| …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-03-20 07:34:37 INFO: Loading these models for language: en (English):\n", + "======================================\n", + "| Processor | Package |\n", + "--------------------------------------\n", + "| tokenize | combined |\n", + "| pos | combined_charlm |\n", + "| lemma | combined_nocharlm |\n", + "| constituency | ptb3-revised_charlm |\n", + "| depparse | combined_charlm |\n", + "| sentiment | sstplus |\n", + "| ner | ontonotes_charlm |\n", + "======================================\n", + "\n", + "2024-03-20 07:34:37 INFO: Using device: cpu\n", + "2024-03-20 07:34:37 INFO: Loading: tokenize\n", + "2024-03-20 07:34:37 INFO: Loading: pos\n", + "2024-03-20 07:34:38 INFO: Loading: lemma\n", + "2024-03-20 07:34:38 INFO: Loading: constituency\n", + "2024-03-20 07:34:38 INFO: Loading: depparse\n", + "2024-03-20 07:34:38 INFO: Loading: sentiment\n", + "2024-03-20 07:34:38 INFO: Loading: ner\n", + "2024-03-20 07:34:39 INFO: Done loading processors!\n" + ] + } + ], "source": [ "# | hide\n", "# from bertopic import BERTopic\n", @@ -36,8 +103,6 @@ "# from hdbscan import HDBSCAN\n", "from itertools import tee, islice, product\n", "import joblib\n", - "# import mlflow\n", - "# from mlflow.models import infer_signature\n", "import nbdev\n", "from nbdev.showdoc import *\n", "import pandas as pd\n", @@ -52,7 +117,6 @@ "from sklearn.model_selection import train_test_split\n", "from sklearn.pipeline import make_pipeline\n", "from src.custom_sklearn_text_transformer_mlflow import CustomSKLearnAnalyzer\n", - "# from src.custom_stanza_mlflow import CustomSKLearnWrapper\n", "import src.dataframe_preprocessor as dfpp\n", "import stanza\n", "from tqdm import tqdm\n", @@ -96,7 +160,154 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "c622bb1b2718469497a488bcfeca738e", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.6.0.json: 0%| …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-03-20 07:34:39 INFO: Downloading default packages for language: en (English) ...\n", + "2024-03-20 07:34:40 INFO: File exists: /home/awchen/stanza_resources/en/default.zip\n", + "2024-03-20 07:34:43 INFO: Finished downloading models and saved to /home/awchen/stanza_resources.\n", + "2024-03-20 07:34:43 INFO: Checking for updates to resources.json in case models have been updated. Note: this behavior can be turned off with download_method=None or download_method=DownloadMethod.REUSE_RESOURCES\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "c4d6af04d98f489ea63edb2d0dbb8a88", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.6.0.json: 0%| …" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-03-20 07:34:44 INFO: Loading these models for language: en (English):\n", + "======================================\n", + "| Processor | Package |\n", + "--------------------------------------\n", + "| tokenize | combined |\n", + "| pos | combined_charlm |\n", + "| lemma | combined_nocharlm |\n", + "| constituency | ptb3-revised_charlm |\n", + "| depparse | combined_charlm |\n", + "| sentiment | sstplus |\n", + "| ner | ontonotes_charlm |\n", + "======================================\n", + "\n", + "2024-03-20 07:34:44 INFO: Using device: cuda\n", + "2024-03-20 07:34:44 INFO: Loading: tokenize\n", + "2024-03-20 07:34:47 INFO: Loading: pos\n", + "2024-03-20 07:34:47 INFO: Loading: lemma\n", + "2024-03-20 07:34:47 INFO: Loading: constituency\n", + "2024-03-20 07:34:47 INFO: Loading: depparse\n", + "2024-03-20 07:34:47 INFO: Loading: sentiment\n", + "2024-03-20 07:34:48 INFO: Loading: ner\n", + "2024-03-20 07:34:48 INFO: Done loading processors!\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "--------------\n", + "Raw Dataframe:\n", + " id \\\n", + "0 54a2b6b019925f464b373351 \n", + "1 54a408a019925f464b3733bc \n", + "2 54a408a26529d92b2c003631 \n", + "3 54a408a66529d92b2c003638 \n", + "4 54a408a719925f464b3733cc \n", + "\n", + " dek \\\n", + "0 How does fried chicken achieve No. 1 status? B... \n", + "1 Spinaci all'Ebraica \n", + "2 This majestic, moist, and richly spiced honey ... \n", + "3 The idea for this sandwich came to me when my ... \n", + "4 In 1930, Simon Agranat, the chief justice of t... \n", + "\n", + " hed pubDate \\\n", + "0 Pickle-Brined Fried Chicken 2014-08-19T04:00:00.000Z \n", + "1 Spinach Jewish Style 2008-09-09T04:00:00.000Z \n", + "2 New Year’s Honey Cake 2008-09-10T04:00:00.000Z \n", + "3 The B.L.A.Bagel with Lox and Avocado 2008-09-08T04:00:00.000Z \n", + "4 Shakshuka a la Doktor Shakshuka 2008-09-09T04:00:00.000Z \n", + "\n", + " author type \\\n", + "0 [] recipe \n", + "1 [{'name': 'Edda Servi Machlin'}] recipe \n", + "2 [{'name': 'Marcy Goldman'}] recipe \n", + "3 [{'name': 'Faye Levy'}] recipe \n", + "4 [{'name': 'Joan Nathan'}] recipe \n", + "\n", + " url \\\n", + "0 /recipes/food/views/pickle-brined-fried-chicke... \n", + "1 /recipes/food/views/spinach-jewish-style-350152 \n", + "2 /recipes/food/views/majestic-and-moist-new-yea... \n", + "3 /recipes/food/views/the-b-l-a-bagel-with-lox-a... \n", + "4 /recipes/food/views/shakshuka-a-la-doktor-shak... \n", + "\n", + " photoData \\\n", + "0 {'id': '54a2b64a6529d92b2c003409', 'filename':... \n", + "1 {'id': '56746182accb4c9831e45e0a', 'filename':... \n", + "2 {'id': '55e85ba4cf90d6663f728014', 'filename':... \n", + "3 {'id': '5674617e47d1a28026045e4f', 'filename':... \n", + "4 {'id': '56746183b47c050a284a4e15', 'filename':... \n", + "\n", + " tag aggregateRating \\\n", + "0 {'category': 'ingredient', 'name': 'Chicken', ... 3.11 \n", + "1 {'category': 'cuisine', 'name': 'Italian', 'ur... 3.22 \n", + "2 {'category': 'cuisine', 'name': 'Jewish', 'url... 3.62 \n", + "3 {'category': 'cuisine', 'name': 'Jewish', 'url... 4.00 \n", + "4 {'category': 'cuisine', 'name': 'Jewish', 'url... 2.71 \n", + "\n", + " ingredients \\\n", + "0 [1 tablespoons yellow mustard seeds, 1 tablesp... \n", + "1 [3 pounds small-leaved bulk spinach, Salt, 1/2... \n", + "2 [3 1/2 cups all-purpose flour, 1 tablespoon ba... \n", + "3 [1 small ripe avocado, preferably Hass (see No... \n", + "4 [2 pounds fresh tomatoes, unpeeled and cut in ... \n", + "\n", + " prepSteps reviewsCount \\\n", + "0 [Toast mustard and coriander seeds in a dry me... 7 \n", + "1 [Remove the stems and roots from the spinach. ... 5 \n", + "2 [I like this cake best baked in a 9-inch angel... 105 \n", + "3 [A short time before serving, mash avocado and... 7 \n", + "4 [1. Place the tomatoes, garlic, salt, paprika,... 7 \n", + "\n", + " willMakeAgainPct dateCrawled \n", + "0 100 1498547035 \n", + "1 80 1498547740 \n", + "2 88 1498547738 \n", + "3 100 1498547740 \n", + "4 83 1498547740 \n", + "(34756, 15)\n" + ] + } + ], "source": [ "# instantiate stanza pipeline\n", "stanza.download('en')\n", @@ -124,7 +335,114 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "--------------\n", + "Preprocessed Dataframe:\n", + " dek \\\n", + "id \n", + "54a4270b19925f464b37c1dc \n", + "54a42cde19925f464b3809d2 Green chiles pickled in soy sauce and vinegar ... \n", + "54a433036529d92b2c015de3 This soup features the flavors of India: aroma... \n", + "54a451926529d92b2c01eda8 \n", + "54a430876529d92b2c013e2b Brown sugar and molasses are balanced by fresh... \n", + "\n", + " hed \\\n", + "id \n", + "54a4270b19925f464b37c1dc Grilled Hearts of Romaine with Blue Cheese Vin... \n", + "54a42cde19925f464b3809d2 Soy-Pickled Jalapeños \n", + "54a433036529d92b2c015de3 Curried Potato and Spinach Soup with Onion Sal... \n", + "54a451926529d92b2c01eda8 Chicken Soup \n", + "54a430876529d92b2c013e2b Sweet-Hot Barbecue Sauce \n", + "\n", + " aggregateRating \\\n", + "id \n", + "54a4270b19925f464b37c1dc 3.64 \n", + "54a42cde19925f464b3809d2 3.43 \n", + "54a433036529d92b2c015de3 3.00 \n", + "54a451926529d92b2c01eda8 3.19 \n", + "54a430876529d92b2c013e2b 0.00 \n", + "\n", + " ingredients \\\n", + "id \n", + "54a4270b19925f464b37c1dc [1 1/2 cups white wine vinegar, 1/2 cup sugar,... \n", + "54a42cde19925f464b3809d2 [3 large fresh jalapeños (4 inches), sliced 1/... \n", + "54a433036529d92b2c015de3 [4 cups chopped red onions (about 2 large), 1 ... \n", + "54a451926529d92b2c01eda8 [1 pound chicken parts, 2 stalks celery, inclu... \n", + "54a430876529d92b2c013e2b [2 tablespoons olive oil, 1 cup chopped onion,... \n", + "\n", + " prepSteps \\\n", + "id \n", + "54a4270b19925f464b37c1dc [Combine first 5 ingredients and 1/4 teaspoon ... \n", + "54a42cde19925f464b3809d2 [Combine all ingredients in a small heavy sauc... \n", + "54a433036529d92b2c015de3 [Combine first 5 ingredients in heavy medium s... \n", + "54a451926529d92b2c01eda8 [1. Pour 12 cups of cold water into a large st... \n", + "54a430876529d92b2c013e2b [Heat oil in large saucepan over medium-high h... \n", + "\n", + " reviewsCount willMakeAgainPct \\\n", + "id \n", + "54a4270b19925f464b37c1dc 9 100 \n", + "54a42cde19925f464b3809d2 6 100 \n", + "54a433036529d92b2c015de3 6 67 \n", + "54a451926529d92b2c01eda8 32 87 \n", + "54a430876529d92b2c013e2b 0 0 \n", + "\n", + " ingredients_lemmafied \\\n", + "id \n", + "54a4270b19925f464b37c1dc cup white wine vinegar brk cup sugar brk cup w... \n", + "54a42cde19925f464b3809d2 large fresh jalapeño inch slice inch thick brk... \n", + "54a433036529d92b2c015de3 cup chop red onion large brk tablespoon sunflo... \n", + "54a451926529d92b2c01eda8 pound chicken part brk stalk celery include le... \n", + "54a430876529d92b2c013e2b tablespoon olive oil brk cup chop onion brk cu... \n", + "\n", + " cuisine_name \\\n", + "id \n", + "54a4270b19925f464b37c1dc Missing Cuisine \n", + "54a42cde19925f464b3809d2 Missing Cuisine \n", + "54a433036529d92b2c015de3 Indian \n", + "54a451926529d92b2c01eda8 Kosher \n", + "54a430876529d92b2c013e2b Missing Cuisine \n", + "\n", + " photo_filename \\\n", + "id \n", + "54a4270b19925f464b37c1dc EP_12162015_placeholders_casual.jpg \n", + "54a42cde19925f464b3809d2 EP_12162015_placeholders_rustic.jpg \n", + "54a433036529d92b2c015de3 234125.jpg \n", + "54a451926529d92b2c01eda8 EP_12162015_placeholders_formal.jpg \n", + "54a430876529d92b2c013e2b EP_12162015_placeholders_rustic.jpg \n", + "\n", + " photo_credit \\\n", + "id \n", + "54a4270b19925f464b37c1dc Photo by Chelsea Kyle, Prop Styling by Rhoda B... \n", + "54a42cde19925f464b3809d2 Photo by Chelsea Kyle, Prop Styling by Anna St... \n", + "54a433036529d92b2c015de3 Brian Leatart \n", + "54a451926529d92b2c01eda8 Photo by Chelsea Kyle, Prop Styling by Rhoda B... \n", + "54a430876529d92b2c013e2b Photo by Chelsea Kyle, Prop Styling by Anna St... \n", + "\n", + " author_name date_published \\\n", + "id \n", + "54a4270b19925f464b37c1dc Kate Higgins 2010-12-16 04:00:00+00:00 \n", + "54a42cde19925f464b3809d2 Lillian Chou 2009-02-19 04:00:00+00:00 \n", + "54a433036529d92b2c015de3 Peter Gordon 2006-03-07 04:00:00+00:00 \n", + "54a451926529d92b2c01eda8 Sharon Lebewohl 2004-08-20 04:00:00+00:00 \n", + "54a430876529d92b2c013e2b Suzanne Tracht 2007-12-03 20:11:11+00:00 \n", + "\n", + " recipe_url \n", + "id \n", + "54a4270b19925f464b37c1dc https://www.epicurious.com/recipes/food/views/... \n", + "54a42cde19925f464b3809d2 https://www.epicurious.com/recipes/food/views/... \n", + "54a433036529d92b2c015de3 https://www.epicurious.com/recipes/food/views/... \n", + "54a451926529d92b2c01eda8 https://www.epicurious.com/recipes/food/views/... \n", + "54a430876529d92b2c013e2b https://www.epicurious.com/recipes/food/views/... \n", + "(50, 14)\n" + ] + } + ], "source": [ "# take sample and train/test split \n", "subset_df = raw_df.sample(n=100, random_state=45)\n", @@ -143,30 +461,54 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "fit_transform start: 2024-03-20 07:36:09.748134\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 50/50 [00:00<00:00, 8744.69it/s]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "fit_transform end: 2024-03-20 07:36:09.766620\n", + "Index(['English', 'English hothouse', 'English hothouse cucumber', 'available',\n", + " 'baby', 'baking', 'baking powder', 'bay', 'bay leave', 'beef',\n", + " ...\n", + " 'white', 'white vinegar', 'white wine', 'white wine vinegar', 'whole',\n", + " 'wine', 'wine vinegar', 'yukon', 'yukon gold', 'yukon gold potato'],\n", + " dtype='object', length=283)\n" + ] + } + ], "source": [ "# cv_params are parameters for the sklearn CountVectorizer or TFIDFVectorizer\n", - "sklearn_transformer_params = {\n", - " 'analyzer': 'word',\n", - " 'ngram_range': (1,4),\n", + "sklearn_transformer_params = { \n", + " 'analyzer': CustomSKLearnAnalyzer().ngram_maker(\n", + " min_ngram_length=1,\n", + " max_ngram_length=4,\n", + " ),\n", " 'min_df':3,\n", - " 'binary':False\n", + " # 'binary':False\n", "}\n", "\n", "sklearn_transformer = TfidfVectorizer(**sklearn_transformer_params)\n", "\n", - "# print('\\n')\n", - "# print('-' * 80)\n", - "# print('sklearn fit transform on ingredients:', end='\\n')\n", - "\n", - "model_input = to_nlp_df['ingredients'].apply(\" \".join).str.lower()\n", - "\n", - "print(\"sklearn fit transform start: \" + str(datetime.now()))\n", + "model_input = to_nlp_df['ingredients_lemmafied']\n", "\n", "# Do fit transform on data\n", + "print(\"fit_transform start: \" + str(datetime.now()))\n", "response = sklearn_transformer.fit_transform(tqdm(model_input)) \n", - "\n", - "print(\"sklearn fit transform end: \" + str(datetime.now()))\n", + "print(\"fit_transform end: \" + str(datetime.now()))\n", "\n", "transformed_recipe = pd.DataFrame(\n", " response.toarray(),\n", @@ -174,42 +516,2297 @@ " index=model_input.index\n", ")\n", "\n", - "print(transformed_recipe)\n", "print(transformed_recipe.columns)" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " | English | \n", + "English hothouse | \n", + "English hothouse cucumber | \n", + "available | \n", + "baby | \n", + "baking | \n", + "baking powder | \n", + "bay | \n", + "bay leave | \n", + "beef | \n", + "... | \n", + "white | \n", + "white vinegar | \n", + "white wine | \n", + "white wine vinegar | \n", + "whole | \n", + "wine | \n", + "wine vinegar | \n", + "yukon | \n", + "yukon gold | \n", + "yukon gold potato | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
id | \n", + "\n", + " | \n", + " | \n", + " | \n", + " | \n", + " | \n", + " | \n", + " | \n", + " | \n", + " | \n", + " | \n", + " | \n", + " | \n", + " | \n", + " | \n", + " | \n", + " | \n", + " | \n", + " | \n", + " | \n", + " | \n", + " |
54a4270b19925f464b37c1dc | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.149995 | \n", + "0.149995 | \n", + "0.000000 | \n", + "... | \n", + "0.188155 | \n", + "0.000000 | \n", + "0.281110 | \n", + "0.281110 | \n", + "0.000000 | \n", + "0.241343 | \n", + "0.252641 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "
54a42cde19925f464b3809d2 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "... | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "
54a433036529d92b2c015de3 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.089666 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.089666 | \n", + "0.089666 | \n", + "0.000000 | \n", + "... | \n", + "0.056239 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.089666 | \n", + "0.089666 | \n", + "0.089666 | \n", + "
54a451926529d92b2c01eda8 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "... | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.478039 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "
54a430876529d92b2c013e2b | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "... | \n", + "0.075879 | \n", + "0.120980 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.097329 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "
54a453df6529d92b2c020687 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.134496 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "... | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.115469 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "
55b0e7116284773353bf4580 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "... | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "
54a42bab6529d92b2c00ffa7 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "... | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "
54a4748f19925f464b399ef2 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "... | \n", + "0.097141 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.154879 | \n", + "0.154879 | \n", + "0.154879 | \n", + "
54a4356a19925f464b3875bb | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "... | \n", + "0.098460 | \n", + "0.000000 | \n", + "0.147102 | \n", + "0.147102 | \n", + "0.126292 | \n", + "0.126292 | \n", + "0.132204 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "
54a4697e6529d92b2c0279d3 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "... | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "
54a45e426529d92b2c02488f | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "... | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "
54a452c96529d92b2c01f889 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.252416 | \n", + "... | \n", + "0.158316 | \n", + "0.252416 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "
54a4323619925f464b384bcc | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "... | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "
54a4259119925f464b37af9c | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.165544 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "... | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "
54a431da6529d92b2c014ee9 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "... | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.184132 | \n", + "0.184132 | \n", + "0.184132 | \n", + "
54a426fd19925f464b37c125 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "... | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "
54a47bb019925f464b39b9b7 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.220619 | \n", + "0.235437 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "... | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "
54a434d819925f464b386e62 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "... | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "
54a428116529d92b2c00d1a7 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "... | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "
54a436036529d92b2c01859e | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "... | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "
54a47edf19925f464b39c58d | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "... | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "
54a419706529d92b2c006650 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "... | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "
54a4349619925f464b386b12 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.125739 | \n", + "0.134184 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "... | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.107952 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "
54a4340f6529d92b2c016be8 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.188089 | \n", + "0.188089 | \n", + "0.000000 | \n", + "... | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "
54a40e546529d92b2c004606 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "... | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "
54a428b419925f464b37d5ce | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.120245 | \n", + "0.128321 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "... | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.103235 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "
54a453a519925f464b38fd16 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "... | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "
54a41cb219925f464b376d82 | \n", + "0.129353 | \n", + "0.129353 | \n", + "0.129353 | \n", + "0.129353 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "... | \n", + "0.081131 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "
54a431896529d92b2c014b27 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.147772 | \n", + "... | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "
54a423ab19925f464b3799f2 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "... | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.132349 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "
54a47c1419925f464b39bb28 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.146874 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "... | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "
593ee3ba12c27b182380821f | \n", + "0.108341 | \n", + "0.108341 | \n", + "0.108341 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "... | \n", + "0.067952 | \n", + "0.000000 | \n", + "0.101522 | \n", + "0.101522 | \n", + "0.000000 | \n", + "0.087160 | \n", + "0.091240 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "
54a456366529d92b2c02235a | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.120817 | \n", + "... | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "
54a452d419925f464b38f1b5 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "... | \n", + "0.111927 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "
54a4659b6529d92b2c026a53 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "... | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "
54a46d5d19925f464b3982d3 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "... | \n", + "0.111053 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "
54a4582119925f464b3927a1 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "... | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "
54a4205319925f464b377c9f | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "... | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "
54a470cc19925f464b39906b | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "... | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.168937 | \n", + "0.176845 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "
54a44f4a6529d92b2c01de45 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "... | \n", + "0.259213 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "
592ef494ae10ad089795ebfa | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "... | \n", + "0.098560 | \n", + "0.000000 | \n", + "0.147252 | \n", + "0.147252 | \n", + "0.000000 | \n", + "0.126421 | \n", + "0.132339 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "
54a41f016529d92b2c00757d | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "... | \n", + "0.124263 | \n", + "0.198122 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "
54a436266529d92b2c01876e | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "... | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "
54a428bd19925f464b37d63e | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "... | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "
54a45a4f6529d92b2c0234d1 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "... | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "
54a41ed76529d92b2c007440 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "... | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "
54a42c906529d92b2c010b74 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.185129 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "... | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.148937 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "
569519a6dc18ea6c22c9b9ab | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "... | \n", + "0.407226 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "
54a438d56529d92b2c019648 | \n", + "0.151165 | \n", + "0.151165 | \n", + "0.151165 | \n", + "0.000000 | \n", + "0.151165 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "... | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.121613 | \n", + "0.127306 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "
50 rows × 283 columns
\n", + "\n", + " | dek | \n", + "hed | \n", + "aggregateRating | \n", + "ingredients | \n", + "prepSteps | \n", + "reviewsCount | \n", + "willMakeAgainPct | \n", + "ingredients_lemmafied | \n", + "cuisine_name | \n", + "photo_filename | \n", + "photo_credit | \n", + "author_name | \n", + "date_published | \n", + "recipe_url | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
id | \n", + "\n", + " | \n", + " | \n", + " | \n", + " | \n", + " | \n", + " | \n", + " | \n", + " | \n", + " | \n", + " | \n", + " | \n", + " | \n", + " | \n", + " |
54a4270b19925f464b37c1dc | \n", + "\n", + " | Grilled Hearts of Romaine with Blue Cheese Vin... | \n", + "3.64 | \n", + "[1 1/2 cups white wine vinegar, 1/2 cup sugar,... | \n", + "[Combine first 5 ingredients and 1/4 teaspoon ... | \n", + "9 | \n", + "100 | \n", + "cup white wine vinegar brk cup sugar brk cup w... | \n", + "Missing Cuisine | \n", + "EP_12162015_placeholders_casual.jpg | \n", + "Photo by Chelsea Kyle, Prop Styling by Rhoda B... | \n", + "Kate Higgins | \n", + "2010-12-16 04:00:00+00:00 | \n", + "https://www.epicurious.com/recipes/food/views/... | \n", + "
54a42cde19925f464b3809d2 | \n", + "Green chiles pickled in soy sauce and vinegar ... | \n", + "Soy-Pickled Jalapeños | \n", + "3.43 | \n", + "[3 large fresh jalapeños (4 inches), sliced 1/... | \n", + "[Combine all ingredients in a small heavy sauc... | \n", + "6 | \n", + "100 | \n", + "large fresh jalapeño inch slice inch thick brk... | \n", + "Missing Cuisine | \n", + "EP_12162015_placeholders_rustic.jpg | \n", + "Photo by Chelsea Kyle, Prop Styling by Anna St... | \n", + "Lillian Chou | \n", + "2009-02-19 04:00:00+00:00 | \n", + "https://www.epicurious.com/recipes/food/views/... | \n", + "
54a433036529d92b2c015de3 | \n", + "This soup features the flavors of India: aroma... | \n", + "Curried Potato and Spinach Soup with Onion Sal... | \n", + "3.00 | \n", + "[4 cups chopped red onions (about 2 large), 1 ... | \n", + "[Combine first 5 ingredients in heavy medium s... | \n", + "6 | \n", + "67 | \n", + "cup chop red onion large brk tablespoon sunflo... | \n", + "Indian | \n", + "234125.jpg | \n", + "Brian Leatart | \n", + "Peter Gordon | \n", + "2006-03-07 04:00:00+00:00 | \n", + "https://www.epicurious.com/recipes/food/views/... | \n", + "
54a451926529d92b2c01eda8 | \n", + "\n", + " | Chicken Soup | \n", + "3.19 | \n", + "[1 pound chicken parts, 2 stalks celery, inclu... | \n", + "[1. Pour 12 cups of cold water into a large st... | \n", + "32 | \n", + "87 | \n", + "pound chicken part brk stalk celery include le... | \n", + "Kosher | \n", + "EP_12162015_placeholders_formal.jpg | \n", + "Photo by Chelsea Kyle, Prop Styling by Rhoda B... | \n", + "Sharon Lebewohl | \n", + "2004-08-20 04:00:00+00:00 | \n", + "https://www.epicurious.com/recipes/food/views/... | \n", + "
54a430876529d92b2c013e2b | \n", + "Brown sugar and molasses are balanced by fresh... | \n", + "Sweet-Hot Barbecue Sauce | \n", + "0.00 | \n", + "[2 tablespoons olive oil, 1 cup chopped onion,... | \n", + "[Heat oil in large saucepan over medium-high h... | \n", + "0 | \n", + "0 | \n", + "tablespoon olive oil brk cup chop onion brk cu... | \n", + "Missing Cuisine | \n", + "EP_12162015_placeholders_rustic.jpg | \n", + "Photo by Chelsea Kyle, Prop Styling by Anna St... | \n", + "Suzanne Tracht | \n", + "2007-12-03 20:11:11+00:00 | \n", + "https://www.epicurious.com/recipes/food/views/... | \n", + "
Repository initialized!\n", + "\n" + ], + "text/plain": [ + "Repository initialized!\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "#@markdown Enter the username of your DAGsHub account:\n", + "DAGSHUB_USER_NAME = \"AaronWChen\" #@param {type:\"string\"}\n", "\n", - "model_input = to_nlp_df['ingredients_lemmafied']\n", + "#@markdown Enter the email for your DAGsHub account:\n", + "DAGSHUB_EMAIL = \"awc33@cornell.edu\" #@param {type:\"string\"}\n", "\n", - "# Do fit transform on data\n", - "print(\"fit_transform start: \" + str(datetime.now()))\n", - "response = sklearn_transformer.fit_transform(tqdm(model_input)) \n", - "print(\"fit_transform end: \" + str(datetime.now()))\n", + "#@markdown Enter the repo name \n", + "DAGSHUB_REPO_NAME = \"MeaLeon\"\n", "\n", - "transformed_recipe = pd.DataFrame(\n", - " response.toarray(),\n", - " columns=sklearn_transformer.get_feature_names_out(),\n", - " index=model_input.index\n", - ")\n", - "\n", - "print(transformed_recipe.columns)" + "#@markdown Enter the name of the branch you are working on \n", + "BRANCH = \"NGRAM-1/try-llm-code-speedup\"\n", + "dagshub.init(repo_name=DAGSHUB_REPO_NAME\n", + " , repo_owner=DAGSHUB_USER_NAME)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Starting DEV stage for TFIDF Encoded model" ] }, { @@ -218,31 +2815,326 @@ "metadata": {}, "outputs": [], "source": [ - "transformed_recipe" + "mlflow.set_tracking_uri(f'https://dagshub.com/{DAGSHUB_USER_NAME}/MeaLeon.mlflow')\n", + "\n", + "# starter idea for making an experiment name can be the git branch, but need more specificity\n", + "experiment_name = f\"{DAGSHUB_EMAIL}/OHE_up_to_quadgrams\"\n", + "mlflow_exp_id = get_experiment_id(experiment_name)\n", + "\n", + "# define model location\n", + "# model_directory = \"/tmp/sklearn_model\"\n", + "model_directory = \"../models/sklearn_model\"\n", + "\n", + "# Define the required artifacts associated with the saved custom pyfunc\n", + "# sklearn_path = model_directory + \"\"\n", + "sklearn_model_path = model_directory + \"/python_model.pkl\"\n", + "sklearn_transformer_path = model_directory + \"/sklearn_transformer.pkl\"\n", + "transformed_recipes_path = model_directory + \"/transformed_recipes.pkl\"\n", + "\n", + "artifacts = {'sklearn_model': sklearn_model_path,\n", + " 'sklearn_transformer': sklearn_transformer_path,\n", + " 'transformed_recipes': transformed_recipes_path\n", + " }\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Preprocess start: 2024-03-20 07:38:00.336408\n", + "Preprocess end: 2024-03-21 01:19:33.081564\n", + "\n", + "\n", + "--------------\n", + "Preprocessed Dataframe:\n", + " dek \\\n", + "id \n", + "54a2b6b019925f464b373351 How does fried chicken achieve No. 1 status? B... \n", + "54a408a019925f464b3733bc Spinaci all'Ebraica \n", + "54a408a26529d92b2c003631 This majestic, moist, and richly spiced honey ... \n", + "54a408a66529d92b2c003638 The idea for this sandwich came to me when my ... \n", + "54a408a719925f464b3733cc In 1930, Simon Agranat, the chief justice of t... \n", + "\n", + " hed \\\n", + "id \n", + "54a2b6b019925f464b373351 Pickle-Brined Fried Chicken \n", + "54a408a019925f464b3733bc Spinach Jewish Style \n", + "54a408a26529d92b2c003631 New Year’s Honey Cake \n", + "54a408a66529d92b2c003638 The B.L.A.Bagel with Lox and Avocado \n", + "54a408a719925f464b3733cc Shakshuka a la Doktor Shakshuka \n", + "\n", + " aggregateRating \\\n", + "id \n", + "54a2b6b019925f464b373351 3.11 \n", + "54a408a019925f464b3733bc 3.22 \n", + "54a408a26529d92b2c003631 3.62 \n", + "54a408a66529d92b2c003638 4.00 \n", + "54a408a719925f464b3733cc 2.71 \n", + "\n", + " ingredients \\\n", + "id \n", + "54a2b6b019925f464b373351 [1 tablespoons yellow mustard seeds, 1 tablesp... \n", + "54a408a019925f464b3733bc [3 pounds small-leaved bulk spinach, Salt, 1/2... \n", + "54a408a26529d92b2c003631 [3 1/2 cups all-purpose flour, 1 tablespoon ba... \n", + "54a408a66529d92b2c003638 [1 small ripe avocado, preferably Hass (see No... \n", + "54a408a719925f464b3733cc [2 pounds fresh tomatoes, unpeeled and cut in ... \n", + "\n", + " prepSteps \\\n", + "id \n", + "54a2b6b019925f464b373351 [Toast mustard and coriander seeds in a dry me... \n", + "54a408a019925f464b3733bc [Remove the stems and roots from the spinach. ... \n", + "54a408a26529d92b2c003631 [I like this cake best baked in a 9-inch angel... \n", + "54a408a66529d92b2c003638 [A short time before serving, mash avocado and... \n", + "54a408a719925f464b3733cc [1. Place the tomatoes, garlic, salt, paprika,... \n", + "\n", + " reviewsCount willMakeAgainPct \\\n", + "id \n", + "54a2b6b019925f464b373351 7 100 \n", + "54a408a019925f464b3733bc 5 80 \n", + "54a408a26529d92b2c003631 105 88 \n", + "54a408a66529d92b2c003638 7 100 \n", + "54a408a719925f464b3733cc 7 83 \n", + "\n", + " ingredients_lemmafied \\\n", + "id \n", + "54a2b6b019925f464b373351 tablespoon yellow mustard seed brk tablespoon ... \n", + "54a408a019925f464b3733bc pound small leave bulk spinach brk salt brk cu... \n", + "54a408a26529d92b2c003631 cup purpose flour brk tablespoon baking powder... \n", + "54a408a66529d92b2c003638 small ripe avocado hass see note brk teaspoon ... \n", + "54a408a719925f464b3733cc pound fresh tomato unpeeled cut quarter ounce ... \n", + "\n", + " cuisine_name \\\n", + "id \n", + "54a2b6b019925f464b373351 Missing Cuisine \n", + "54a408a019925f464b3733bc Italian \n", + "54a408a26529d92b2c003631 Kosher \n", + "54a408a66529d92b2c003638 Kosher \n", + "54a408a719925f464b3733cc Kosher \n", + "\n", + " photo_filename \\\n", + "id \n", + "54a2b6b019925f464b373351 51247610_fried-chicken_1x1.jpg \n", + "54a408a019925f464b3733bc EP_12162015_placeholders_rustic.jpg \n", + "54a408a26529d92b2c003631 EP_09022015_honeycake-2.jpg \n", + "54a408a66529d92b2c003638 EP_12162015_placeholders_casual.jpg \n", + "54a408a719925f464b3733cc EP_12162015_placeholders_formal.jpg \n", + "\n", + " photo_credit \\\n", + "id \n", + "54a2b6b019925f464b373351 Michael Graydon and Nikole Herriott \n", + "54a408a019925f464b3733bc Photo by Chelsea Kyle, Prop Styling by Anna St... \n", + "54a408a26529d92b2c003631 Photo by Chelsea Kyle, Food Styling by Anna St... \n", + "54a408a66529d92b2c003638 Photo by Chelsea Kyle, Prop Styling by Rhoda B... \n", + "54a408a719925f464b3733cc Photo by Chelsea Kyle, Prop Styling by Rhoda B... \n", + "\n", + " author_name date_published \\\n", + "id \n", + "54a2b6b019925f464b373351 Missing Author Name 2014-08-19 04:00:00+00:00 \n", + "54a408a019925f464b3733bc Edda Servi Machlin 2008-09-09 04:00:00+00:00 \n", + "54a408a26529d92b2c003631 Marcy Goldman 2008-09-10 04:00:00+00:00 \n", + "54a408a66529d92b2c003638 Faye Levy 2008-09-08 04:00:00+00:00 \n", + "54a408a719925f464b3733cc Joan Nathan 2008-09-09 04:00:00+00:00 \n", + "\n", + " recipe_url \n", + "id \n", + "54a2b6b019925f464b373351 https://www.epicurious.com/recipes/food/views/... \n", + "54a408a019925f464b3733bc https://www.epicurious.com/recipes/food/views/... \n", + "54a408a26529d92b2c003631 https://www.epicurious.com/recipes/food/views/... \n", + "54a408a66529d92b2c003638 https://www.epicurious.com/recipes/food/views/... \n", + "54a408a719925f464b3733cc https://www.epicurious.com/recipes/food/views/... \n", + "(34756, 14)\n" + ] + } + ], + "source": [ + "# pre_proc_df is cleaned dataframe\n", + "print(\"Preprocess start: \" + str(datetime.now()))\n", + "whole_nlp_df = dfpp.preprocess_dataframe(raw_df)\n", + "print(\"Preprocess end: \" + str(datetime.now()))\n", + "print('\\n')\n", + "print('--------------')\n", + "print('Preprocessed Dataframe:', end='\\n')\n", + "print(whole_nlp_df.head())\n", + "print(whole_nlp_df.shape)" + ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "--------------------------------------------------------------------------------\n", + "sklearn fit transform on ingredients:\n", + "\n", + "\n", + "--------------------------------------------------------------------------------\n", + "Input Data: \n", + "id\n", + "54a2b6b019925f464b373351 tablespoon yellow mustard seed brk tablespoon ...\n", + "54a408a019925f464b3733bc pound small leave bulk spinach brk salt brk cu...\n", + "54a408a26529d92b2c003631 cup purpose flour brk tablespoon baking powder...\n", + "54a408a66529d92b2c003638 small ripe avocado hass see note brk teaspoon ...\n", + "54a408a719925f464b3733cc pound fresh tomato unpeeled cut quarter ounce ...\n", + " ... \n", + "59541a31bff3052847ae2107 tablespoon unsalt butter room temperature brk ...\n", + "5954233ad52ca90dc28200e7 tablespoon stick salt butter room temperature ...\n", + "595424c2109c972493636f83 tablespoon unsalted butter more greasing pan b...\n", + "5956638625dc3d1d829b7166 coarse salt brk lime wedge brk ounce tomato ju...\n", + "59566daa25dc3d1d829b7169 bottle millileter sour beer such almanac citra...\n", + "Name: ingredients_lemmafied, Length: 34756, dtype: object\n", + "\n", + "\n", + "--------------------------------------------------------------------------------\n", + "Input Data Shape: \n", + "(34756,)\n", + "\n", + "\n", + "--------------------------------------------------------------------------------\n", + "Random 3 Records from Input Data: \n", + "id\n", + "54a40caa19925f464b374017 boneless muscovy duck breast half pound total ...\n", + "55d4e08063b1ba1b5534b198 tablespoon white wine vinegar brk teaspoon sug...\n", + "54a43ad16529d92b2c019fc3 cup basmati rice ounce brk cup sweeten flake c...\n", + "Name: ingredients_lemmafied, dtype: object\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 34756/34756 [00:03<00:00, 11131.57it/s]\n", + "/home/awchen/Repos/Projects/MeaLeon/.venv/lib/python3.10/site-packages/mlflow/models/signature.py:213: UserWarning: Hint: Inferred schema contains integer column(s). Integer columns in Python cannot represent missing values. If your input data contains missing values at inference time, it will be encoded as floats and will cause a schema enforcement error. The best way to avoid this problem is to infer the model schema based on a realistic data sample (training dataset) that includes missing values. Alternatively, you can declare integer columns as doubles (float64) whenever these columns may have missing values. See `Handling Integers With Missing Values