From ab26b2e4a1dda08593a14f7afc36b7e5acc524b7 Mon Sep 17 00:00:00 2001
From: Aaron W Chen <awc33@cornell.edu>
Date: Tue, 9 Apr 2024 13:53:50 -0700
Subject: [PATCH] Try making TFIDF embeddings

Tested DVC and fixed commands

Tried making TFIDF embeddings, but no space on laptop
---
 nbs/15_create_tfidf_embeddings.ipynb | 1249 ++++++++++++++++++++++++++
 1 file changed, 1249 insertions(+)
 create mode 100644 nbs/15_create_tfidf_embeddings.ipynb

diff --git a/nbs/15_create_tfidf_embeddings.ipynb b/nbs/15_create_tfidf_embeddings.ipynb
new file mode 100644
index 0000000..9e207ff
--- /dev/null
+++ b/nbs/15_create_tfidf_embeddings.ipynb
@@ -0,0 +1,1249 @@
+{
+ "cells": [
+  {
+   "cell_type": "raw",
+   "metadata": {},
+   "source": [
+    "---\n",
+    "description: test\n",
+    "output-file: template.html\n",
+    "title: Template\n",
+    "\n",
+    "---\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# | default_exp core"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "d7088c22cb4c4a2aa598d9fb700e8af0",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.6.0.json:   0%|   …"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2024-03-27 20:57:40 INFO: Downloading default packages for language: en (English) ...\n",
+      "2024-03-27 20:57:41 INFO: File exists: /home/awchen/stanza_resources/en/default.zip\n",
+      "2024-03-27 20:57:44 INFO: Finished downloading models and saved to /home/awchen/stanza_resources.\n",
+      "2024-03-27 20:57:44 INFO: Checking for updates to resources.json in case models have been updated.  Note: this behavior can be turned off with download_method=None or download_method=DownloadMethod.REUSE_RESOURCES\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "f62ab630814a49449d6f0be2ac47e87e",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.6.0.json:   0%|   …"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2024-03-27 20:57:45 INFO: Loading these models for language: en (English):\n",
+      "======================================\n",
+      "| Processor    | Package             |\n",
+      "--------------------------------------\n",
+      "| tokenize     | combined            |\n",
+      "| pos          | combined_charlm     |\n",
+      "| lemma        | combined_nocharlm   |\n",
+      "| constituency | ptb3-revised_charlm |\n",
+      "| depparse     | combined_charlm     |\n",
+      "| sentiment    | sstplus             |\n",
+      "| ner          | ontonotes_charlm    |\n",
+      "======================================\n",
+      "\n",
+      "2024-03-27 20:57:45 INFO: Using device: cpu\n",
+      "2024-03-27 20:57:45 INFO: Loading: tokenize\n",
+      "2024-03-27 20:57:45 INFO: Loading: pos\n",
+      "2024-03-27 20:57:45 INFO: Loading: lemma\n",
+      "2024-03-27 20:57:45 INFO: Loading: constituency\n",
+      "2024-03-27 20:57:45 INFO: Loading: depparse\n",
+      "2024-03-27 20:57:45 INFO: Loading: sentiment\n",
+      "2024-03-27 20:57:46 INFO: Loading: ner\n",
+      "2024-03-27 20:57:46 INFO: Done loading processors!\n"
+     ]
+    }
+   ],
+   "source": [
+    "# | hide\n",
+    "# from bertopic import BERTopic\n",
+    "# from bertopic.vectorizers import OnlineCountVectorizer\n",
+    "import dagshub\n",
+    "from datetime import datetime\n",
+    "import dill as pickle\n",
+    "import dvc.api\n",
+    "# from hdbscan import HDBSCAN\n",
+    "from itertools import tee, islice, product\n",
+    "import joblib\n",
+    "import nbdev\n",
+    "from nbdev.showdoc import *\n",
+    "import pandas as pd\n",
+    "import re\n",
+    "from sentence_transformers import SentenceTransformer\n",
+    "from sklearn.feature_extraction.text import (\n",
+    "    CountVectorizer\n",
+    "    , TfidfTransformer\n",
+    "    , TfidfVectorizer\n",
+    "    , \n",
+    ")\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "from sklearn.pipeline import make_pipeline\n",
+    "from src.custom_sklearn_text_transformer_mlflow import CustomSKLearnAnalyzer\n",
+    "import src.dataframe_preprocessor as dfpp\n",
+    "import stanza\n",
+    "from tqdm import tqdm\n",
+    "# from umap import UMAP"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!export 'PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:128'"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# | export"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# | hide\n",
+    "# nbdev.nbdev_export()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Data Preparation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "e03d339b07814cbc840d2f131a85f927",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.6.0.json:   0%|   …"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2024-03-27 20:57:46 INFO: Downloading default packages for language: en (English) ...\n",
+      "2024-03-27 20:57:47 INFO: File exists: /home/awchen/stanza_resources/en/default.zip\n",
+      "2024-03-27 20:57:50 INFO: Finished downloading models and saved to /home/awchen/stanza_resources.\n",
+      "2024-03-27 20:57:50 INFO: Checking for updates to resources.json in case models have been updated.  Note: this behavior can be turned off with download_method=None or download_method=DownloadMethod.REUSE_RESOURCES\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "c31d2969037c4ad1a79333ff34364488",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.6.0.json:   0%|   …"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2024-03-27 20:57:51 INFO: Loading these models for language: en (English):\n",
+      "======================================\n",
+      "| Processor    | Package             |\n",
+      "--------------------------------------\n",
+      "| tokenize     | combined            |\n",
+      "| pos          | combined_charlm     |\n",
+      "| lemma        | combined_nocharlm   |\n",
+      "| constituency | ptb3-revised_charlm |\n",
+      "| depparse     | combined_charlm     |\n",
+      "| sentiment    | sstplus             |\n",
+      "| ner          | ontonotes_charlm    |\n",
+      "======================================\n",
+      "\n",
+      "2024-03-27 20:57:51 INFO: Using device: cuda\n",
+      "2024-03-27 20:57:51 INFO: Loading: tokenize\n",
+      "2024-03-27 20:57:54 INFO: Loading: pos\n",
+      "2024-03-27 20:57:54 INFO: Loading: lemma\n",
+      "2024-03-27 20:57:54 INFO: Loading: constituency\n",
+      "2024-03-27 20:57:54 INFO: Loading: depparse\n",
+      "2024-03-27 20:57:54 INFO: Loading: sentiment\n",
+      "2024-03-27 20:57:55 INFO: Loading: ner\n",
+      "2024-03-27 20:57:55 INFO: Done loading processors!\n"
+     ]
+    }
+   ],
+   "source": [
+    "# instantiate stanza pipeline\n",
+    "stanza.download('en')\n",
+    "nlp = stanza.Pipeline('en', \n",
+    "                    depparse_batch_size=50, \n",
+    "                    depparse_min_length_to_batch_separately=50,\n",
+    "                    verbose=True,\n",
+    "                    use_gpu=True, # set to true when on cloud/not on streaming computer\n",
+    "                    batch_size=100\n",
+    "                    )\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# load raw data and preprocess/clean\n",
+    "data = dvc.api.read(\n",
+    "    path='../data/recipes-en-201706/epicurious-recipes_m2.json'\n",
+    "    , mode='r')\n",
+    "raw_df = pd.read_json(data)\n",
+    "print('\\n')\n",
+    "print('--------------')\n",
+    "print('Raw Dataframe:', end='\\n')\n",
+    "print(raw_df.head())\n",
+    "print(raw_df.shape)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# take sample and train/test split \n",
+    "subset_df = raw_df.sample(n=100, random_state=45)\n",
+    "train_df, test_df = train_test_split(subset_df,test_size=0.5, random_state=45)\n",
+    "\n",
+    "# pre_proc_df is cleaned dataframe\n",
+    "to_nlp_df = dfpp.preprocess_dataframe(train_df)\n",
+    "print('\\n')\n",
+    "print('--------------')\n",
+    "print('Preprocessed Dataframe:', end='\\n')\n",
+    "print(to_nlp_df.head())\n",
+    "print(to_nlp_df.shape)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# cv_params are parameters for the sklearn CountVectorizer or TFIDFVectorizer\n",
+    "sklearn_transformer_params = {    \n",
+    "    'analyzer': CustomSKLearnAnalyzer().ngram_maker(\n",
+    "        min_ngram_length=1,\n",
+    "        max_ngram_length=4,\n",
+    "        ),\n",
+    "    'min_df':3,\n",
+    "    # 'binary':False\n",
+    "}\n",
+    "\n",
+    "sklearn_transformer = TfidfVectorizer(**sklearn_transformer_params)\n",
+    "\n",
+    "model_input = to_nlp_df['ingredients_lemmafied']\n",
+    "\n",
+    "# Do fit transform on data\n",
+    "print(\"fit_transform start: \" + str(datetime.now()))\n",
+    "response = sklearn_transformer.fit_transform(tqdm(model_input)) \n",
+    "print(\"fit_transform end: \" + str(datetime.now()))\n",
+    "\n",
+    "transformed_recipe = pd.DataFrame(\n",
+    "        response.toarray(),\n",
+    "        columns=sklearn_transformer.get_feature_names_out(),\n",
+    "        index=model_input.index\n",
+    ")\n",
+    "\n",
+    "print(transformed_recipe.columns)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "transformed_recipe"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "transformed_recipe.columns.tolist()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "to_nlp_df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Prepare whole dataframe for new processing\n",
+    "import mlflow\n",
+    "from mlflow.models import infer_signature\n",
+    "from src.custom_stanza_mlflow import CustomSKLearnWrapper"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# this function allows us to get the experiment ID from an experiment name\n",
+    "def get_experiment_id(name):\n",
+    "    exp = mlflow.get_experiment_by_name(name)\n",
+    "    if exp is None:\n",
+    "      exp_id = mlflow.create_experiment(name)\n",
+    "      return exp_id\n",
+    "    return exp.experiment_id"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Repository initialized!\n",
+       "</pre>\n"
+      ],
+      "text/plain": [
+       "Repository initialized!\n"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "#@markdown Enter the username of your DAGsHub account:\n",
+    "DAGSHUB_USER_NAME = \"AaronWChen\" #@param {type:\"string\"}\n",
+    "\n",
+    "#@markdown Enter the email for your DAGsHub account:\n",
+    "DAGSHUB_EMAIL = \"awc33@cornell.edu\" #@param {type:\"string\"}\n",
+    "\n",
+    "#@markdown Enter the repo name \n",
+    "DAGSHUB_REPO_NAME = \"MeaLeon\"\n",
+    "\n",
+    "#@markdown Enter the name of the branch you are working on \n",
+    "BRANCH = \"NGRAM-1/try-llm-code-speedup\"\n",
+    "dagshub.init(repo_name=DAGSHUB_REPO_NAME\n",
+    "             , repo_owner=DAGSHUB_USER_NAME)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Starting DEV stage for TFIDF Encoded model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "mlflow.set_tracking_uri(f'https://dagshub.com/{DAGSHUB_USER_NAME}/MeaLeon.mlflow')\n",
+    "\n",
+    "# starter idea for making an experiment name can be the git branch, but need more specificity\n",
+    "experiment_name = f\"{DAGSHUB_EMAIL}/OHE_up_to_quadgrams\"\n",
+    "mlflow_exp_id = get_experiment_id(experiment_name)\n",
+    "\n",
+    "# define model location\n",
+    "# model_directory = \"/tmp/sklearn_model\"\n",
+    "model_directory = \"../models/sklearn_model\"\n",
+    "\n",
+    "# Define the required artifacts associated with the saved custom pyfunc\n",
+    "# sklearn_path = model_directory + \"\"\n",
+    "sklearn_model_path = model_directory + \"/python_model.pkl\"\n",
+    "sklearn_transformer_path = model_directory + \"/sklearn_transformer.pkl\"\n",
+    "transformed_recipes_path = model_directory + \"/transformed_recipes.pkl\"\n",
+    "combined_df_path = model_directory + \"/combined_df.pkl\"\n",
+    "\n",
+    "artifacts = {'sklearn_model': sklearn_model_path,\n",
+    "             'sklearn_transformer': sklearn_transformer_path,\n",
+    "             'transformed_recipes': transformed_recipes_path,\n",
+    "            #  'combined_data': combined_df_path\n",
+    "             }\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# pre_proc_df is cleaned dataframe\n",
+    "print(\"Preprocess start: \" + str(datetime.now()))\n",
+    "whole_nlp_df = dfpp.preprocess_dataframe(raw_df)\n",
+    "print(\"Preprocess end: \" + str(datetime.now()))\n",
+    "print('\\n')\n",
+    "print('--------------')\n",
+    "print('Preprocessed Dataframe: ', end='\\n')\n",
+    "print(whole_nlp_df.head())\n",
+    "print(whole_nlp_df.shape)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "whole_nlp_df.to_parquet('../joblib/2024.03.19/pre_proc_df.parquet.gzip', \n",
+    "                        compression='gzip',\n",
+    "                        index=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>dek</th>\n",
+       "      <th>hed</th>\n",
+       "      <th>aggregateRating</th>\n",
+       "      <th>ingredients</th>\n",
+       "      <th>prepSteps</th>\n",
+       "      <th>reviewsCount</th>\n",
+       "      <th>willMakeAgainPct</th>\n",
+       "      <th>ingredients_lemmafied</th>\n",
+       "      <th>cuisine_name</th>\n",
+       "      <th>photo_filename</th>\n",
+       "      <th>photo_credit</th>\n",
+       "      <th>author_name</th>\n",
+       "      <th>date_published</th>\n",
+       "      <th>recipe_url</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>id</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>54a2b6b019925f464b373351</th>\n",
+       "      <td>How does fried chicken achieve No. 1 status? B...</td>\n",
+       "      <td>Pickle-Brined Fried Chicken</td>\n",
+       "      <td>3.11</td>\n",
+       "      <td>[1 tablespoons yellow mustard seeds, 1 tablesp...</td>\n",
+       "      <td>[Toast mustard and coriander seeds in a dry me...</td>\n",
+       "      <td>7</td>\n",
+       "      <td>100</td>\n",
+       "      <td>tablespoon yellow mustard seed brk tablespoon ...</td>\n",
+       "      <td>Missing Cuisine</td>\n",
+       "      <td>51247610_fried-chicken_1x1.jpg</td>\n",
+       "      <td>Michael Graydon and Nikole Herriott</td>\n",
+       "      <td>Missing Author Name</td>\n",
+       "      <td>2014-08-19 04:00:00+00:00</td>\n",
+       "      <td>https://www.epicurious.com/recipes/food/views/...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>54a408a019925f464b3733bc</th>\n",
+       "      <td>Spinaci all'Ebraica</td>\n",
+       "      <td>Spinach Jewish Style</td>\n",
+       "      <td>3.22</td>\n",
+       "      <td>[3 pounds small-leaved bulk spinach, Salt, 1/2...</td>\n",
+       "      <td>[Remove the stems and roots from the spinach. ...</td>\n",
+       "      <td>5</td>\n",
+       "      <td>80</td>\n",
+       "      <td>pound small leave bulk spinach brk salt brk cu...</td>\n",
+       "      <td>Italian</td>\n",
+       "      <td>EP_12162015_placeholders_rustic.jpg</td>\n",
+       "      <td>Photo by Chelsea Kyle, Prop Styling by Anna St...</td>\n",
+       "      <td>Edda Servi Machlin</td>\n",
+       "      <td>2008-09-09 04:00:00+00:00</td>\n",
+       "      <td>https://www.epicurious.com/recipes/food/views/...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>54a408a26529d92b2c003631</th>\n",
+       "      <td>This majestic, moist, and richly spiced honey ...</td>\n",
+       "      <td>New Year’s Honey Cake</td>\n",
+       "      <td>3.62</td>\n",
+       "      <td>[3 1/2 cups all-purpose flour, 1 tablespoon ba...</td>\n",
+       "      <td>[I like this cake best baked in a 9-inch angel...</td>\n",
+       "      <td>105</td>\n",
+       "      <td>88</td>\n",
+       "      <td>cup purpose flour brk tablespoon baking powder...</td>\n",
+       "      <td>Kosher</td>\n",
+       "      <td>EP_09022015_honeycake-2.jpg</td>\n",
+       "      <td>Photo by Chelsea Kyle, Food Styling by Anna St...</td>\n",
+       "      <td>Marcy Goldman</td>\n",
+       "      <td>2008-09-10 04:00:00+00:00</td>\n",
+       "      <td>https://www.epicurious.com/recipes/food/views/...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>54a408a66529d92b2c003638</th>\n",
+       "      <td>The idea for this sandwich came to me when my ...</td>\n",
+       "      <td>The B.L.A.—Bagel with Lox and Avocado</td>\n",
+       "      <td>4.00</td>\n",
+       "      <td>[1 small ripe avocado, preferably Hass (see No...</td>\n",
+       "      <td>[A short time before serving, mash avocado and...</td>\n",
+       "      <td>7</td>\n",
+       "      <td>100</td>\n",
+       "      <td>small ripe avocado hass see note brk teaspoon ...</td>\n",
+       "      <td>Kosher</td>\n",
+       "      <td>EP_12162015_placeholders_casual.jpg</td>\n",
+       "      <td>Photo by Chelsea Kyle, Prop Styling by Rhoda B...</td>\n",
+       "      <td>Faye Levy</td>\n",
+       "      <td>2008-09-08 04:00:00+00:00</td>\n",
+       "      <td>https://www.epicurious.com/recipes/food/views/...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>54a408a719925f464b3733cc</th>\n",
+       "      <td>In 1930, Simon Agranat, the chief justice of t...</td>\n",
+       "      <td>Shakshuka a la Doktor Shakshuka</td>\n",
+       "      <td>2.71</td>\n",
+       "      <td>[2 pounds fresh tomatoes, unpeeled and cut in ...</td>\n",
+       "      <td>[1. Place the tomatoes, garlic, salt, paprika,...</td>\n",
+       "      <td>7</td>\n",
+       "      <td>83</td>\n",
+       "      <td>pound fresh tomato unpeeled cut quarter ounce ...</td>\n",
+       "      <td>Kosher</td>\n",
+       "      <td>EP_12162015_placeholders_formal.jpg</td>\n",
+       "      <td>Photo by Chelsea Kyle, Prop Styling by Rhoda B...</td>\n",
+       "      <td>Joan Nathan</td>\n",
+       "      <td>2008-09-09 04:00:00+00:00</td>\n",
+       "      <td>https://www.epicurious.com/recipes/food/views/...</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                                                        dek  \\\n",
+       "id                                                                            \n",
+       "54a2b6b019925f464b373351  How does fried chicken achieve No. 1 status? B...   \n",
+       "54a408a019925f464b3733bc                                Spinaci all'Ebraica   \n",
+       "54a408a26529d92b2c003631  This majestic, moist, and richly spiced honey ...   \n",
+       "54a408a66529d92b2c003638  The idea for this sandwich came to me when my ...   \n",
+       "54a408a719925f464b3733cc  In 1930, Simon Agranat, the chief justice of t...   \n",
+       "\n",
+       "                                                            hed  \\\n",
+       "id                                                                \n",
+       "54a2b6b019925f464b373351            Pickle-Brined Fried Chicken   \n",
+       "54a408a019925f464b3733bc                   Spinach Jewish Style   \n",
+       "54a408a26529d92b2c003631                  New Year’s Honey Cake   \n",
+       "54a408a66529d92b2c003638  The B.L.A.—Bagel with Lox and Avocado   \n",
+       "54a408a719925f464b3733cc        Shakshuka a la Doktor Shakshuka   \n",
+       "\n",
+       "                          aggregateRating  \\\n",
+       "id                                          \n",
+       "54a2b6b019925f464b373351             3.11   \n",
+       "54a408a019925f464b3733bc             3.22   \n",
+       "54a408a26529d92b2c003631             3.62   \n",
+       "54a408a66529d92b2c003638             4.00   \n",
+       "54a408a719925f464b3733cc             2.71   \n",
+       "\n",
+       "                                                                ingredients  \\\n",
+       "id                                                                            \n",
+       "54a2b6b019925f464b373351  [1 tablespoons yellow mustard seeds, 1 tablesp...   \n",
+       "54a408a019925f464b3733bc  [3 pounds small-leaved bulk spinach, Salt, 1/2...   \n",
+       "54a408a26529d92b2c003631  [3 1/2 cups all-purpose flour, 1 tablespoon ba...   \n",
+       "54a408a66529d92b2c003638  [1 small ripe avocado, preferably Hass (see No...   \n",
+       "54a408a719925f464b3733cc  [2 pounds fresh tomatoes, unpeeled and cut in ...   \n",
+       "\n",
+       "                                                                  prepSteps  \\\n",
+       "id                                                                            \n",
+       "54a2b6b019925f464b373351  [Toast mustard and coriander seeds in a dry me...   \n",
+       "54a408a019925f464b3733bc  [Remove the stems and roots from the spinach. ...   \n",
+       "54a408a26529d92b2c003631  [I like this cake best baked in a 9-inch angel...   \n",
+       "54a408a66529d92b2c003638  [A short time before serving, mash avocado and...   \n",
+       "54a408a719925f464b3733cc  [1. Place the tomatoes, garlic, salt, paprika,...   \n",
+       "\n",
+       "                          reviewsCount  willMakeAgainPct  \\\n",
+       "id                                                         \n",
+       "54a2b6b019925f464b373351             7               100   \n",
+       "54a408a019925f464b3733bc             5                80   \n",
+       "54a408a26529d92b2c003631           105                88   \n",
+       "54a408a66529d92b2c003638             7               100   \n",
+       "54a408a719925f464b3733cc             7                83   \n",
+       "\n",
+       "                                                      ingredients_lemmafied  \\\n",
+       "id                                                                            \n",
+       "54a2b6b019925f464b373351  tablespoon yellow mustard seed brk tablespoon ...   \n",
+       "54a408a019925f464b3733bc  pound small leave bulk spinach brk salt brk cu...   \n",
+       "54a408a26529d92b2c003631  cup purpose flour brk tablespoon baking powder...   \n",
+       "54a408a66529d92b2c003638  small ripe avocado hass see note brk teaspoon ...   \n",
+       "54a408a719925f464b3733cc  pound fresh tomato unpeeled cut quarter ounce ...   \n",
+       "\n",
+       "                             cuisine_name  \\\n",
+       "id                                          \n",
+       "54a2b6b019925f464b373351  Missing Cuisine   \n",
+       "54a408a019925f464b3733bc          Italian   \n",
+       "54a408a26529d92b2c003631           Kosher   \n",
+       "54a408a66529d92b2c003638           Kosher   \n",
+       "54a408a719925f464b3733cc           Kosher   \n",
+       "\n",
+       "                                               photo_filename  \\\n",
+       "id                                                              \n",
+       "54a2b6b019925f464b373351       51247610_fried-chicken_1x1.jpg   \n",
+       "54a408a019925f464b3733bc  EP_12162015_placeholders_rustic.jpg   \n",
+       "54a408a26529d92b2c003631          EP_09022015_honeycake-2.jpg   \n",
+       "54a408a66529d92b2c003638  EP_12162015_placeholders_casual.jpg   \n",
+       "54a408a719925f464b3733cc  EP_12162015_placeholders_formal.jpg   \n",
+       "\n",
+       "                                                               photo_credit  \\\n",
+       "id                                                                            \n",
+       "54a2b6b019925f464b373351                Michael Graydon and Nikole Herriott   \n",
+       "54a408a019925f464b3733bc  Photo by Chelsea Kyle, Prop Styling by Anna St...   \n",
+       "54a408a26529d92b2c003631  Photo by Chelsea Kyle, Food Styling by Anna St...   \n",
+       "54a408a66529d92b2c003638  Photo by Chelsea Kyle, Prop Styling by Rhoda B...   \n",
+       "54a408a719925f464b3733cc  Photo by Chelsea Kyle, Prop Styling by Rhoda B...   \n",
+       "\n",
+       "                                  author_name            date_published  \\\n",
+       "id                                                                        \n",
+       "54a2b6b019925f464b373351  Missing Author Name 2014-08-19 04:00:00+00:00   \n",
+       "54a408a019925f464b3733bc   Edda Servi Machlin 2008-09-09 04:00:00+00:00   \n",
+       "54a408a26529d92b2c003631        Marcy Goldman 2008-09-10 04:00:00+00:00   \n",
+       "54a408a66529d92b2c003638            Faye Levy 2008-09-08 04:00:00+00:00   \n",
+       "54a408a719925f464b3733cc          Joan Nathan 2008-09-09 04:00:00+00:00   \n",
+       "\n",
+       "                                                                 recipe_url  \n",
+       "id                                                                           \n",
+       "54a2b6b019925f464b373351  https://www.epicurious.com/recipes/food/views/...  \n",
+       "54a408a019925f464b3733bc  https://www.epicurious.com/recipes/food/views/...  \n",
+       "54a408a26529d92b2c003631  https://www.epicurious.com/recipes/food/views/...  \n",
+       "54a408a66529d92b2c003638  https://www.epicurious.com/recipes/food/views/...  \n",
+       "54a408a719925f464b3733cc  https://www.epicurious.com/recipes/food/views/...  "
+      ]
+     },
+     "execution_count": null,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "whole_nlp_df = pd.read_parquet('../joblib/2024.03.19/pre_proc_df.parquet.gzip')\n",
+    "whole_nlp_df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n",
+      "sklearn fit transform on ingredients:\n",
+      "\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n",
+      "Input Data: \n",
+      "id\n",
+      "54a2b6b019925f464b373351    tablespoon yellow mustard seed brk tablespoon ...\n",
+      "54a408a019925f464b3733bc    pound small leave bulk spinach brk salt brk cu...\n",
+      "54a408a26529d92b2c003631    cup purpose flour brk tablespoon baking powder...\n",
+      "54a408a66529d92b2c003638    small ripe avocado hass see note brk teaspoon ...\n",
+      "54a408a719925f464b3733cc    pound fresh tomato unpeeled cut quarter ounce ...\n",
+      "                                                  ...                        \n",
+      "59541a31bff3052847ae2107    tablespoon unsalt butter room temperature brk ...\n",
+      "5954233ad52ca90dc28200e7    tablespoon stick salt butter room temperature ...\n",
+      "595424c2109c972493636f83    tablespoon unsalted butter more greasing pan b...\n",
+      "5956638625dc3d1d829b7166    coarse salt brk lime wedge brk ounce tomato ju...\n",
+      "59566daa25dc3d1d829b7169    bottle millileter sour beer such almanac citra...\n",
+      "Name: ingredients_lemmafied, Length: 34756, dtype: object\n",
+      "\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n",
+      "Input Data Shape: \n",
+      "(34756,)\n",
+      "\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n",
+      "Random 3 Records from Input Data: \n",
+      "id\n",
+      "54a40caa19925f464b374017    boneless muscovy duck breast half pound total ...\n",
+      "55d4e08063b1ba1b5534b198    tablespoon white wine vinegar brk teaspoon sug...\n",
+      "54a43ad16529d92b2c019fc3    cup basmati rice ounce brk cup sweeten flake c...\n",
+      "Name: ingredients_lemmafied, dtype: object\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100%|██████████| 34756/34756 [00:02<00:00, 11734.80it/s]\n",
+      "/home/awchen/Repos/Projects/MeaLeon/.venv/lib/python3.10/site-packages/mlflow/models/signature.py:213: UserWarning: Hint: Inferred schema contains integer column(s). Integer columns in Python cannot represent missing values. If your input data contains missing values at inference time, it will be encoded as floats and will cause a schema enforcement error. The best way to avoid this problem is to infer the model schema based on a realistic data sample (training dataset) that includes missing values. Alternatively, you can declare integer columns as doubles (float64) whenever these columns may have missing values. See `Handling Integers With Missing Values <https://www.mlflow.org/docs/latest/models.html#handling-integers-with-missing-values>`_ for more details.\n",
+      "  outputs = _infer_schema(model_output) if model_output is not None else None\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n",
+      "Transformed Data:\n",
+      "                          100g  125g  13x9x2  150g  1pound  1tablespoon  \\\n",
+      "id                                                                        \n",
+      "54a2b6b019925f464b373351     0     0       0     0       0            0   \n",
+      "54a408a019925f464b3733bc     0     0       0     0       0            0   \n",
+      "54a408a26529d92b2c003631     0     0       0     0       0            0   \n",
+      "54a408a66529d92b2c003638     0     0       0     0       0            0   \n",
+      "54a408a719925f464b3733cc     0     0       0     0       0            0   \n",
+      "\n",
+      "                          1teaspoon  200g  250g  2cup  ...  árbol divide  \\\n",
+      "id                                                     ...                 \n",
+      "54a2b6b019925f464b373351          0     0     0     0  ...             0   \n",
+      "54a408a019925f464b3733bc          0     0     0     0  ...             0   \n",
+      "54a408a26529d92b2c003631          0     0     0     0  ...             0   \n",
+      "54a408a66529d92b2c003638          0     0     0     0  ...             0   \n",
+      "54a408a719925f464b3733cc          0     0     0     0  ...             0   \n",
+      "\n",
+      "                          árbol seed  árbol seed remove  árbol stem  \\\n",
+      "id                                                                    \n",
+      "54a2b6b019925f464b373351           0                  0           0   \n",
+      "54a408a019925f464b3733bc           0                  0           0   \n",
+      "54a408a26529d92b2c003631           0                  0           0   \n",
+      "54a408a66529d92b2c003638           0                  0           0   \n",
+      "54a408a719925f464b3733cc           0                  0           0   \n",
+      "\n",
+      "                          árbol teaspoon  árbol teaspoon crush  \\\n",
+      "id                                                               \n",
+      "54a2b6b019925f464b373351               0                     0   \n",
+      "54a408a019925f464b3733bc               0                     0   \n",
+      "54a408a26529d92b2c003631               0                     0   \n",
+      "54a408a66529d92b2c003638               0                     0   \n",
+      "54a408a719925f464b3733cc               0                     0   \n",
+      "\n",
+      "                          árbol teaspoon crush red  árbol wipe  \\\n",
+      "id                                                               \n",
+      "54a2b6b019925f464b373351                         0           0   \n",
+      "54a408a019925f464b3733bc                         0           0   \n",
+      "54a408a26529d92b2c003631                         0           0   \n",
+      "54a408a66529d92b2c003638                         0           0   \n",
+      "54a408a719925f464b3733cc                         0           0   \n",
+      "\n",
+      "                          árbol wipe clean  épice  \n",
+      "id                                                 \n",
+      "54a2b6b019925f464b373351                 0      0  \n",
+      "54a408a019925f464b3733bc                 0      0  \n",
+      "54a408a26529d92b2c003631                 0      0  \n",
+      "54a408a66529d92b2c003638                 0      0  \n",
+      "54a408a719925f464b3733cc                 0      0  \n",
+      "\n",
+      "[5 rows x 78378 columns]\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "75ae1f83e714420fafae1ba91d492f9a",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "f9218a470aba4aa38a890cd5b940cf10",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "1912ef35a25a419e8d653ce4d5d7f322",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2024/03/27 20:59:59 WARNING mlflow.utils.environment: Encountered an unexpected error while inferring pip requirements (model URI: /tmp/tmpahyshrio/model, flavor: python_function), fall back to return ['cloudpickle==2.2.1']. Set logging level to DEBUG to see the full traceback.\n",
+      "/home/awchen/Repos/Projects/MeaLeon/.venv/lib/python3.10/site-packages/_distutils_hack/__init__.py:33: UserWarning: Setuptools is replacing distutils.\n",
+      "  warnings.warn(\"Setuptools is replacing distutils.\")\n"
+     ]
+    },
+    {
+     "ename": "MlflowException",
+     "evalue": "API request to https://dagshub.com/AaronWChen/MeaLeon.mlflow/api/2.0/mlflow-artifacts/artifacts/ad83ec0a104a44b5a16da48605603245/2e72322335494f84af9ff7e7e44c3ff9/artifacts/sklearn_model/artifacts/transformed_recipes.pkl failed with exception HTTPSConnectionPool(host='dagshub.com', port=443): Max retries exceeded with url: /AaronWChen/MeaLeon.mlflow/api/2.0/mlflow-artifacts/artifacts/ad83ec0a104a44b5a16da48605603245/2e72322335494f84af9ff7e7e44c3ff9/artifacts/sklearn_model/artifacts/transformed_recipes.pkl (Caused by SSLError(SSLEOFError(8, 'EOF occurred in violation of protocol (_ssl.c:2396)')))",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mSSLEOFError\u001b[0m                               Traceback (most recent call last)",
+      "File \u001b[0;32m~/Repos/Projects/MeaLeon/.venv/lib/python3.10/site-packages/urllib3/connectionpool.py:715\u001b[0m, in \u001b[0;36mHTTPConnectionPool.urlopen\u001b[0;34m(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)\u001b[0m\n\u001b[1;32m    714\u001b[0m \u001b[38;5;66;03m# Make the request on the httplib connection object.\u001b[39;00m\n\u001b[0;32m--> 715\u001b[0m httplib_response \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_make_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    716\u001b[0m \u001b[43m    \u001b[49m\u001b[43mconn\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    717\u001b[0m \u001b[43m    \u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    718\u001b[0m \u001b[43m    \u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    719\u001b[0m \u001b[43m    \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout_obj\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    720\u001b[0m \u001b[43m    \u001b[49m\u001b[43mbody\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbody\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    721\u001b[0m \u001b[43m    \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    722\u001b[0m \u001b[43m    \u001b[49m\u001b[43mchunked\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mchunked\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    723\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    725\u001b[0m \u001b[38;5;66;03m# If we're going to release the connection in ``finally:``, then\u001b[39;00m\n\u001b[1;32m    726\u001b[0m \u001b[38;5;66;03m# the response doesn't need to know about the connection. Otherwise\u001b[39;00m\n\u001b[1;32m    727\u001b[0m \u001b[38;5;66;03m# it will also try to release it and we'll have a double-release\u001b[39;00m\n\u001b[1;32m    728\u001b[0m \u001b[38;5;66;03m# mess.\u001b[39;00m\n",
+      "File \u001b[0;32m~/Repos/Projects/MeaLeon/.venv/lib/python3.10/site-packages/urllib3/connectionpool.py:416\u001b[0m, in \u001b[0;36mHTTPConnectionPool._make_request\u001b[0;34m(self, conn, method, url, timeout, chunked, **httplib_request_kw)\u001b[0m\n\u001b[1;32m    415\u001b[0m     \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 416\u001b[0m         \u001b[43mconn\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mhttplib_request_kw\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    418\u001b[0m \u001b[38;5;66;03m# We are swallowing BrokenPipeError (errno.EPIPE) since the server is\u001b[39;00m\n\u001b[1;32m    419\u001b[0m \u001b[38;5;66;03m# legitimately able to close the connection after sending a valid response.\u001b[39;00m\n\u001b[1;32m    420\u001b[0m \u001b[38;5;66;03m# With this behaviour, the received response is still readable.\u001b[39;00m\n",
+      "File \u001b[0;32m~/Repos/Projects/MeaLeon/.venv/lib/python3.10/site-packages/urllib3/connection.py:244\u001b[0m, in \u001b[0;36mHTTPConnection.request\u001b[0;34m(self, method, url, body, headers)\u001b[0m\n\u001b[1;32m    243\u001b[0m     headers[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mUser-Agent\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m _get_default_user_agent()\n\u001b[0;32m--> 244\u001b[0m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mHTTPConnection\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbody\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbody\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/.asdf/installs/python/3.10.10/lib/python3.10/http/client.py:1282\u001b[0m, in \u001b[0;36mHTTPConnection.request\u001b[0;34m(self, method, url, body, headers, encode_chunked)\u001b[0m\n\u001b[1;32m   1281\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Send a complete request to the server.\"\"\"\u001b[39;00m\n\u001b[0;32m-> 1282\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_send_request\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbody\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mencode_chunked\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/.asdf/installs/python/3.10.10/lib/python3.10/http/client.py:1328\u001b[0m, in \u001b[0;36mHTTPConnection._send_request\u001b[0;34m(self, method, url, body, headers, encode_chunked)\u001b[0m\n\u001b[1;32m   1327\u001b[0m     body \u001b[38;5;241m=\u001b[39m _encode(body, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mbody\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m-> 1328\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mendheaders\u001b[49m\u001b[43m(\u001b[49m\u001b[43mbody\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mencode_chunked\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mencode_chunked\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/.asdf/installs/python/3.10.10/lib/python3.10/http/client.py:1277\u001b[0m, in \u001b[0;36mHTTPConnection.endheaders\u001b[0;34m(self, message_body, encode_chunked)\u001b[0m\n\u001b[1;32m   1276\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m CannotSendHeader()\n\u001b[0;32m-> 1277\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_send_output\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmessage_body\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mencode_chunked\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mencode_chunked\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/.asdf/installs/python/3.10.10/lib/python3.10/http/client.py:1076\u001b[0m, in \u001b[0;36mHTTPConnection._send_output\u001b[0;34m(self, message_body, encode_chunked)\u001b[0m\n\u001b[1;32m   1074\u001b[0m         chunk \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mlen\u001b[39m(chunk)\u001b[38;5;132;01m:\u001b[39;00m\u001b[38;5;124mX\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;130;01m\\r\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;241m.\u001b[39mencode(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mascii\u001b[39m\u001b[38;5;124m'\u001b[39m) \u001b[38;5;241m+\u001b[39m chunk \\\n\u001b[1;32m   1075\u001b[0m             \u001b[38;5;241m+\u001b[39m \u001b[38;5;124mb\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;130;01m\\r\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m'\u001b[39m\n\u001b[0;32m-> 1076\u001b[0m     \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msend\u001b[49m\u001b[43m(\u001b[49m\u001b[43mchunk\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1078\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m encode_chunked \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_http_vsn \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m11\u001b[39m:\n\u001b[1;32m   1079\u001b[0m     \u001b[38;5;66;03m# end chunked transfer\u001b[39;00m\n",
+      "File \u001b[0;32m~/.asdf/installs/python/3.10.10/lib/python3.10/http/client.py:998\u001b[0m, in \u001b[0;36mHTTPConnection.send\u001b[0;34m(self, data)\u001b[0m\n\u001b[1;32m    997\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 998\u001b[0m     \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msock\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msendall\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdata\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    999\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m:\n",
+      "File \u001b[0;32m~/.asdf/installs/python/3.10.10/lib/python3.10/ssl.py:1237\u001b[0m, in \u001b[0;36mSSLSocket.sendall\u001b[0;34m(self, data, flags)\u001b[0m\n\u001b[1;32m   1236\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m count \u001b[38;5;241m<\u001b[39m amount:\n\u001b[0;32m-> 1237\u001b[0m     v \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msend\u001b[49m\u001b[43m(\u001b[49m\u001b[43mbyte_view\u001b[49m\u001b[43m[\u001b[49m\u001b[43mcount\u001b[49m\u001b[43m:\u001b[49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1238\u001b[0m     count \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m v\n",
+      "File \u001b[0;32m~/.asdf/installs/python/3.10.10/lib/python3.10/ssl.py:1206\u001b[0m, in \u001b[0;36mSSLSocket.send\u001b[0;34m(self, data, flags)\u001b[0m\n\u001b[1;32m   1203\u001b[0m         \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m   1204\u001b[0m             \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnon-zero flags not allowed in calls to send() on \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m%\u001b[39m\n\u001b[1;32m   1205\u001b[0m             \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m)\n\u001b[0;32m-> 1206\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_sslobj\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mwrite\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdata\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1207\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n",
+      "\u001b[0;31mSSLEOFError\u001b[0m: EOF occurred in violation of protocol (_ssl.c:2396)",
+      "\nDuring handling of the above exception, another exception occurred:\n",
+      "\u001b[0;31mMaxRetryError\u001b[0m                             Traceback (most recent call last)",
+      "File \u001b[0;32m~/Repos/Projects/MeaLeon/.venv/lib/python3.10/site-packages/requests/adapters.py:486\u001b[0m, in \u001b[0;36mHTTPAdapter.send\u001b[0;34m(self, request, stream, timeout, verify, cert, proxies)\u001b[0m\n\u001b[1;32m    485\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 486\u001b[0m     resp \u001b[38;5;241m=\u001b[39m \u001b[43mconn\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43murlopen\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    487\u001b[0m \u001b[43m        \u001b[49m\u001b[43mmethod\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    488\u001b[0m \u001b[43m        \u001b[49m\u001b[43murl\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    489\u001b[0m \u001b[43m        \u001b[49m\u001b[43mbody\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbody\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    490\u001b[0m \u001b[43m        \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    491\u001b[0m \u001b[43m        \u001b[49m\u001b[43mredirect\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m    492\u001b[0m \u001b[43m        \u001b[49m\u001b[43massert_same_host\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m    493\u001b[0m \u001b[43m        \u001b[49m\u001b[43mpreload_content\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m    494\u001b[0m \u001b[43m        \u001b[49m\u001b[43mdecode_content\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m    495\u001b[0m \u001b[43m        \u001b[49m\u001b[43mretries\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmax_retries\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    496\u001b[0m \u001b[43m        \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    497\u001b[0m \u001b[43m        \u001b[49m\u001b[43mchunked\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mchunked\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    498\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    500\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (ProtocolError, \u001b[38;5;167;01mOSError\u001b[39;00m) \u001b[38;5;28;01mas\u001b[39;00m err:\n",
+      "File \u001b[0;32m~/Repos/Projects/MeaLeon/.venv/lib/python3.10/site-packages/urllib3/connectionpool.py:827\u001b[0m, in \u001b[0;36mHTTPConnectionPool.urlopen\u001b[0;34m(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)\u001b[0m\n\u001b[1;32m    824\u001b[0m     log\u001b[38;5;241m.\u001b[39mwarning(\n\u001b[1;32m    825\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mRetrying (\u001b[39m\u001b[38;5;132;01m%r\u001b[39;00m\u001b[38;5;124m) after connection broken by \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m%r\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m: \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m\"\u001b[39m, retries, err, url\n\u001b[1;32m    826\u001b[0m     )\n\u001b[0;32m--> 827\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43murlopen\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    828\u001b[0m \u001b[43m        \u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    829\u001b[0m \u001b[43m        \u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    830\u001b[0m \u001b[43m        \u001b[49m\u001b[43mbody\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    831\u001b[0m \u001b[43m        \u001b[49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    832\u001b[0m \u001b[43m        \u001b[49m\u001b[43mretries\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    833\u001b[0m \u001b[43m        \u001b[49m\u001b[43mredirect\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    834\u001b[0m \u001b[43m        \u001b[49m\u001b[43massert_same_host\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    835\u001b[0m \u001b[43m        \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    836\u001b[0m \u001b[43m        \u001b[49m\u001b[43mpool_timeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpool_timeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    837\u001b[0m \u001b[43m        \u001b[49m\u001b[43mrelease_conn\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrelease_conn\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    838\u001b[0m \u001b[43m        \u001b[49m\u001b[43mchunked\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mchunked\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    839\u001b[0m \u001b[43m        \u001b[49m\u001b[43mbody_pos\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbody_pos\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    840\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mresponse_kw\u001b[49m\n\u001b[1;32m    841\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    843\u001b[0m \u001b[38;5;66;03m# Handle redirect?\u001b[39;00m\n",
+      "File \u001b[0;32m~/Repos/Projects/MeaLeon/.venv/lib/python3.10/site-packages/urllib3/connectionpool.py:827\u001b[0m, in \u001b[0;36mHTTPConnectionPool.urlopen\u001b[0;34m(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)\u001b[0m\n\u001b[1;32m    824\u001b[0m     log\u001b[38;5;241m.\u001b[39mwarning(\n\u001b[1;32m    825\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mRetrying (\u001b[39m\u001b[38;5;132;01m%r\u001b[39;00m\u001b[38;5;124m) after connection broken by \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m%r\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m: \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m\"\u001b[39m, retries, err, url\n\u001b[1;32m    826\u001b[0m     )\n\u001b[0;32m--> 827\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43murlopen\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    828\u001b[0m \u001b[43m        \u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    829\u001b[0m \u001b[43m        \u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    830\u001b[0m \u001b[43m        \u001b[49m\u001b[43mbody\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    831\u001b[0m \u001b[43m        \u001b[49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    832\u001b[0m \u001b[43m        \u001b[49m\u001b[43mretries\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    833\u001b[0m \u001b[43m        \u001b[49m\u001b[43mredirect\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    834\u001b[0m \u001b[43m        \u001b[49m\u001b[43massert_same_host\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    835\u001b[0m \u001b[43m        \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    836\u001b[0m \u001b[43m        \u001b[49m\u001b[43mpool_timeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpool_timeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    837\u001b[0m \u001b[43m        \u001b[49m\u001b[43mrelease_conn\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrelease_conn\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    838\u001b[0m \u001b[43m        \u001b[49m\u001b[43mchunked\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mchunked\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    839\u001b[0m \u001b[43m        \u001b[49m\u001b[43mbody_pos\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbody_pos\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    840\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mresponse_kw\u001b[49m\n\u001b[1;32m    841\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    843\u001b[0m \u001b[38;5;66;03m# Handle redirect?\u001b[39;00m\n",
+      "    \u001b[0;31m[... skipping similar frames: HTTPConnectionPool.urlopen at line 827 (2 times)]\u001b[0m\n",
+      "File \u001b[0;32m~/Repos/Projects/MeaLeon/.venv/lib/python3.10/site-packages/urllib3/connectionpool.py:827\u001b[0m, in \u001b[0;36mHTTPConnectionPool.urlopen\u001b[0;34m(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)\u001b[0m\n\u001b[1;32m    824\u001b[0m     log\u001b[38;5;241m.\u001b[39mwarning(\n\u001b[1;32m    825\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mRetrying (\u001b[39m\u001b[38;5;132;01m%r\u001b[39;00m\u001b[38;5;124m) after connection broken by \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m%r\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m: \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m\"\u001b[39m, retries, err, url\n\u001b[1;32m    826\u001b[0m     )\n\u001b[0;32m--> 827\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43murlopen\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    828\u001b[0m \u001b[43m        \u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    829\u001b[0m \u001b[43m        \u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    830\u001b[0m \u001b[43m        \u001b[49m\u001b[43mbody\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    831\u001b[0m \u001b[43m        \u001b[49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    832\u001b[0m \u001b[43m        \u001b[49m\u001b[43mretries\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    833\u001b[0m \u001b[43m        \u001b[49m\u001b[43mredirect\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    834\u001b[0m \u001b[43m        \u001b[49m\u001b[43massert_same_host\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    835\u001b[0m \u001b[43m        \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    836\u001b[0m \u001b[43m        \u001b[49m\u001b[43mpool_timeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpool_timeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    837\u001b[0m \u001b[43m        \u001b[49m\u001b[43mrelease_conn\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrelease_conn\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    838\u001b[0m \u001b[43m        \u001b[49m\u001b[43mchunked\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mchunked\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    839\u001b[0m \u001b[43m        \u001b[49m\u001b[43mbody_pos\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mbody_pos\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    840\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mresponse_kw\u001b[49m\n\u001b[1;32m    841\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    843\u001b[0m \u001b[38;5;66;03m# Handle redirect?\u001b[39;00m\n",
+      "File \u001b[0;32m~/Repos/Projects/MeaLeon/.venv/lib/python3.10/site-packages/urllib3/connectionpool.py:799\u001b[0m, in \u001b[0;36mHTTPConnectionPool.urlopen\u001b[0;34m(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)\u001b[0m\n\u001b[1;32m    797\u001b[0m     e \u001b[38;5;241m=\u001b[39m ProtocolError(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mConnection aborted.\u001b[39m\u001b[38;5;124m\"\u001b[39m, e)\n\u001b[0;32m--> 799\u001b[0m retries \u001b[38;5;241m=\u001b[39m \u001b[43mretries\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mincrement\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    800\u001b[0m \u001b[43m    \u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43merror\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43me\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m_pool\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m_stacktrace\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msys\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexc_info\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m2\u001b[39;49m\u001b[43m]\u001b[49m\n\u001b[1;32m    801\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    802\u001b[0m retries\u001b[38;5;241m.\u001b[39msleep()\n",
+      "File \u001b[0;32m~/Repos/Projects/MeaLeon/.venv/lib/python3.10/site-packages/urllib3/util/retry.py:592\u001b[0m, in \u001b[0;36mRetry.increment\u001b[0;34m(self, method, url, response, error, _pool, _stacktrace)\u001b[0m\n\u001b[1;32m    591\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m new_retry\u001b[38;5;241m.\u001b[39mis_exhausted():\n\u001b[0;32m--> 592\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m MaxRetryError(_pool, url, error \u001b[38;5;129;01mor\u001b[39;00m ResponseError(cause))\n\u001b[1;32m    594\u001b[0m log\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mIncremented Retry for (url=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m): \u001b[39m\u001b[38;5;132;01m%r\u001b[39;00m\u001b[38;5;124m\"\u001b[39m, url, new_retry)\n",
+      "\u001b[0;31mMaxRetryError\u001b[0m: HTTPSConnectionPool(host='dagshub.com', port=443): Max retries exceeded with url: /AaronWChen/MeaLeon.mlflow/api/2.0/mlflow-artifacts/artifacts/ad83ec0a104a44b5a16da48605603245/2e72322335494f84af9ff7e7e44c3ff9/artifacts/sklearn_model/artifacts/transformed_recipes.pkl (Caused by SSLError(SSLEOFError(8, 'EOF occurred in violation of protocol (_ssl.c:2396)')))",
+      "\nDuring handling of the above exception, another exception occurred:\n",
+      "\u001b[0;31mSSLError\u001b[0m                                  Traceback (most recent call last)",
+      "File \u001b[0;32m~/Repos/Projects/MeaLeon/.venv/lib/python3.10/site-packages/mlflow/utils/rest_utils.py:99\u001b[0m, in \u001b[0;36mhttp_request\u001b[0;34m(host_creds, endpoint, method, max_retries, backoff_factor, extra_headers, retry_codes, timeout, raise_on_status, **kwargs)\u001b[0m\n\u001b[1;32m     98\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m---> 99\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_get_http_response_with_retries\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    100\u001b[0m \u001b[43m        \u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    101\u001b[0m \u001b[43m        \u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    102\u001b[0m \u001b[43m        \u001b[49m\u001b[43mmax_retries\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    103\u001b[0m \u001b[43m        \u001b[49m\u001b[43mbackoff_factor\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    104\u001b[0m \u001b[43m        \u001b[49m\u001b[43mretry_codes\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    105\u001b[0m \u001b[43m        \u001b[49m\u001b[43mraise_on_status\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    106\u001b[0m \u001b[43m        \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    107\u001b[0m \u001b[43m        \u001b[49m\u001b[43mverify\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mhost_creds\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mverify\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    108\u001b[0m \u001b[43m        \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    109\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    110\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    111\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m requests\u001b[38;5;241m.\u001b[39mexceptions\u001b[38;5;241m.\u001b[39mTimeout \u001b[38;5;28;01mas\u001b[39;00m to:\n",
+      "File \u001b[0;32m~/Repos/Projects/MeaLeon/.venv/lib/python3.10/site-packages/mlflow/utils/request_utils.py:151\u001b[0m, in \u001b[0;36m_get_http_response_with_retries\u001b[0;34m(method, url, max_retries, backoff_factor, retry_codes, raise_on_status, **kwargs)\u001b[0m\n\u001b[1;32m    150\u001b[0m session \u001b[38;5;241m=\u001b[39m _get_request_session(max_retries, backoff_factor, retry_codes, raise_on_status)\n\u001b[0;32m--> 151\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43msession\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/Repos/Projects/MeaLeon/.venv/lib/python3.10/site-packages/requests/sessions.py:589\u001b[0m, in \u001b[0;36mSession.request\u001b[0;34m(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)\u001b[0m\n\u001b[1;32m    588\u001b[0m send_kwargs\u001b[38;5;241m.\u001b[39mupdate(settings)\n\u001b[0;32m--> 589\u001b[0m resp \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msend\u001b[49m\u001b[43m(\u001b[49m\u001b[43mprep\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43msend_kwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    591\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m resp\n",
+      "File \u001b[0;32m~/Repos/Projects/MeaLeon/.venv/lib/python3.10/site-packages/requests/sessions.py:703\u001b[0m, in \u001b[0;36mSession.send\u001b[0;34m(self, request, **kwargs)\u001b[0m\n\u001b[1;32m    702\u001b[0m \u001b[38;5;66;03m# Send the request\u001b[39;00m\n\u001b[0;32m--> 703\u001b[0m r \u001b[38;5;241m=\u001b[39m \u001b[43madapter\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msend\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrequest\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    705\u001b[0m \u001b[38;5;66;03m# Total elapsed time of the request (approximately)\u001b[39;00m\n",
+      "File \u001b[0;32m~/Repos/Projects/MeaLeon/.venv/lib/python3.10/site-packages/requests/adapters.py:517\u001b[0m, in \u001b[0;36mHTTPAdapter.send\u001b[0;34m(self, request, stream, timeout, verify, cert, proxies)\u001b[0m\n\u001b[1;32m    515\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(e\u001b[38;5;241m.\u001b[39mreason, _SSLError):\n\u001b[1;32m    516\u001b[0m     \u001b[38;5;66;03m# This branch is for urllib3 v1.22 and later.\u001b[39;00m\n\u001b[0;32m--> 517\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m SSLError(e, request\u001b[38;5;241m=\u001b[39mrequest)\n\u001b[1;32m    519\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mConnectionError\u001b[39;00m(e, request\u001b[38;5;241m=\u001b[39mrequest)\n",
+      "\u001b[0;31mSSLError\u001b[0m: HTTPSConnectionPool(host='dagshub.com', port=443): Max retries exceeded with url: /AaronWChen/MeaLeon.mlflow/api/2.0/mlflow-artifacts/artifacts/ad83ec0a104a44b5a16da48605603245/2e72322335494f84af9ff7e7e44c3ff9/artifacts/sklearn_model/artifacts/transformed_recipes.pkl (Caused by SSLError(SSLEOFError(8, 'EOF occurred in violation of protocol (_ssl.c:2396)')))",
+      "\nDuring handling of the above exception, another exception occurred:\n",
+      "\u001b[0;31mMlflowException\u001b[0m                           Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[8], line 103\u001b[0m\n\u001b[1;32m     97\u001b[0m     pickle\u001b[38;5;241m.\u001b[39mdump(transformed_recipe, fo)\n\u001b[1;32m     99\u001b[0m \u001b[38;5;66;03m# with open(combined_df_path, 'wb') as fo:\u001b[39;00m\n\u001b[1;32m    100\u001b[0m \u001b[38;5;66;03m#     pickle.dump(combined_df, fo)\u001b[39;00m\n\u001b[0;32m--> 103\u001b[0m model_info \u001b[38;5;241m=\u001b[39m \u001b[43mmlflow\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpyfunc\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlog_model\u001b[49m\u001b[43m(\u001b[49m\u001b[43m \u001b[49m\n\u001b[1;32m    104\u001b[0m \u001b[43m    \u001b[49m\u001b[43mcode_path\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m../src/\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    105\u001b[0m \u001b[43m    \u001b[49m\u001b[43mpython_model\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mCustomSKLearnWrapper\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    106\u001b[0m \u001b[43m    \u001b[49m\u001b[43minput_example\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mwhole_nlp_df\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mingredients_lemmafied\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    107\u001b[0m \u001b[43m    \u001b[49m\u001b[43msignature\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msignature\u001b[49m\u001b[43m,\u001b[49m\u001b[43m        \u001b[49m\n\u001b[1;32m    108\u001b[0m \u001b[43m    \u001b[49m\u001b[43martifact_path\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43msklearn_model\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m    109\u001b[0m \u001b[43m    \u001b[49m\u001b[43martifacts\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43martifacts\u001b[49m\n\u001b[1;32m    110\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m \n\u001b[1;32m    112\u001b[0m \u001b[38;5;66;03m# since this uses a custom Stanza analyzer, we have to use a custom mlflow.Pyfunc.PythonModel\u001b[39;00m\n",
+      "File \u001b[0;32m~/Repos/Projects/MeaLeon/.venv/lib/python3.10/site-packages/mlflow/pyfunc/__init__.py:2116\u001b[0m, in \u001b[0;36mlog_model\u001b[0;34m(artifact_path, loader_module, data_path, code_path, conda_env, python_model, artifacts, registered_model_name, signature, input_example, await_registration_for, pip_requirements, extra_pip_requirements, metadata, model_config)\u001b[0m\n\u001b[1;32m   1949\u001b[0m \u001b[38;5;129m@format_docstring\u001b[39m(LOG_MODEL_PARAM_DOCS\u001b[38;5;241m.\u001b[39mformat(package_name\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mscikit-learn\u001b[39m\u001b[38;5;124m\"\u001b[39m))\n\u001b[1;32m   1950\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mlog_model\u001b[39m(\n\u001b[1;32m   1951\u001b[0m     artifact_path,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m   1965\u001b[0m     model_config\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m   1966\u001b[0m ):\n\u001b[1;32m   1967\u001b[0m \u001b[38;5;250m    \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m   1968\u001b[0m \u001b[38;5;124;03m    Log a Pyfunc model with custom inference logic and optional data dependencies as an MLflow\u001b[39;00m\n\u001b[1;32m   1969\u001b[0m \u001b[38;5;124;03m    artifact for the current run.\u001b[39;00m\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m   2114\u001b[0m \u001b[38;5;124;03m             metadata of the logged model.\u001b[39;00m\n\u001b[1;32m   2115\u001b[0m \u001b[38;5;124;03m    \"\"\"\u001b[39;00m\n\u001b[0;32m-> 2116\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mModel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlog\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   2117\u001b[0m \u001b[43m        \u001b[49m\u001b[43martifact_path\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43martifact_path\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2118\u001b[0m \u001b[43m        \u001b[49m\u001b[43mflavor\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmlflow\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpyfunc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2119\u001b[0m \u001b[43m        \u001b[49m\u001b[43mloader_module\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mloader_module\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2120\u001b[0m \u001b[43m        \u001b[49m\u001b[43mdata_path\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata_path\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2121\u001b[0m \u001b[43m        \u001b[49m\u001b[43mcode_path\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcode_path\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2122\u001b[0m \u001b[43m        \u001b[49m\u001b[43mpython_model\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpython_model\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2123\u001b[0m \u001b[43m        \u001b[49m\u001b[43martifacts\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43martifacts\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2124\u001b[0m \u001b[43m        \u001b[49m\u001b[43mconda_env\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconda_env\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2125\u001b[0m \u001b[43m        \u001b[49m\u001b[43mregistered_model_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mregistered_model_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2126\u001b[0m \u001b[43m        \u001b[49m\u001b[43msignature\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msignature\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2127\u001b[0m \u001b[43m        \u001b[49m\u001b[43minput_example\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minput_example\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2128\u001b[0m \u001b[43m        \u001b[49m\u001b[43mawait_registration_for\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mawait_registration_for\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2129\u001b[0m \u001b[43m        \u001b[49m\u001b[43mpip_requirements\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpip_requirements\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2130\u001b[0m \u001b[43m        \u001b[49m\u001b[43mextra_pip_requirements\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextra_pip_requirements\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2131\u001b[0m \u001b[43m        \u001b[49m\u001b[43mmetadata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmetadata\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2132\u001b[0m \u001b[43m        \u001b[49m\u001b[43mmodel_config\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmodel_config\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   2133\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/Repos/Projects/MeaLeon/.venv/lib/python3.10/site-packages/mlflow/models/model.py:620\u001b[0m, in \u001b[0;36mModel.log\u001b[0;34m(cls, artifact_path, flavor, registered_model_name, await_registration_for, metadata, **kwargs)\u001b[0m\n\u001b[1;32m    618\u001b[0m     _logger\u001b[38;5;241m.\u001b[39mwarning(_LOG_MODEL_MISSING_SIGNATURE_WARNING)\n\u001b[1;32m    619\u001b[0m flavor\u001b[38;5;241m.\u001b[39msave_model(path\u001b[38;5;241m=\u001b[39mlocal_path, mlflow_model\u001b[38;5;241m=\u001b[39mmlflow_model, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m--> 620\u001b[0m \u001b[43mmlflow\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtracking\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfluent\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlog_artifacts\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlocal_path\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmlflow_model\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43martifact_path\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    621\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m    622\u001b[0m     mlflow\u001b[38;5;241m.\u001b[39mtracking\u001b[38;5;241m.\u001b[39mfluent\u001b[38;5;241m.\u001b[39m_record_logged_model(mlflow_model)\n",
+      "File \u001b[0;32m~/Repos/Projects/MeaLeon/.venv/lib/python3.10/site-packages/mlflow/tracking/fluent.py:1008\u001b[0m, in \u001b[0;36mlog_artifacts\u001b[0;34m(local_dir, artifact_path)\u001b[0m\n\u001b[1;32m    978\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m    979\u001b[0m \u001b[38;5;124;03mLog all the contents of a local directory as artifacts of the run. If no run is active,\u001b[39;00m\n\u001b[1;32m    980\u001b[0m \u001b[38;5;124;03mthis method will create a new active run.\u001b[39;00m\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m   1005\u001b[0m \u001b[38;5;124;03m        mlflow.log_artifacts(\"data\", artifact_path=\"states\")\u001b[39;00m\n\u001b[1;32m   1006\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m   1007\u001b[0m run_id \u001b[38;5;241m=\u001b[39m _get_or_start_run()\u001b[38;5;241m.\u001b[39minfo\u001b[38;5;241m.\u001b[39mrun_id\n\u001b[0;32m-> 1008\u001b[0m \u001b[43mMlflowClient\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlog_artifacts\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrun_id\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlocal_dir\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43martifact_path\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/Repos/Projects/MeaLeon/.venv/lib/python3.10/site-packages/mlflow/tracking/client.py:1188\u001b[0m, in \u001b[0;36mMlflowClient.log_artifacts\u001b[0;34m(self, run_id, local_dir, artifact_path)\u001b[0m\n\u001b[1;32m   1144\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mlog_artifacts\u001b[39m(\n\u001b[1;32m   1145\u001b[0m     \u001b[38;5;28mself\u001b[39m, run_id: \u001b[38;5;28mstr\u001b[39m, local_dir: \u001b[38;5;28mstr\u001b[39m, artifact_path: Optional[\u001b[38;5;28mstr\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m   1146\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m   1147\u001b[0m \u001b[38;5;250m    \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m   1148\u001b[0m \u001b[38;5;124;03m    Write a directory of files to the remote ``artifact_uri``.\u001b[39;00m\n\u001b[1;32m   1149\u001b[0m \n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m   1186\u001b[0m \u001b[38;5;124;03m        is_dir: True\u001b[39;00m\n\u001b[1;32m   1187\u001b[0m \u001b[38;5;124;03m    \"\"\"\u001b[39;00m\n\u001b[0;32m-> 1188\u001b[0m     \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_tracking_client\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlog_artifacts\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrun_id\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlocal_dir\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43martifact_path\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/Repos/Projects/MeaLeon/.venv/lib/python3.10/site-packages/mlflow/tracking/_tracking_service/client.py:538\u001b[0m, in \u001b[0;36mTrackingServiceClient.log_artifacts\u001b[0;34m(self, run_id, local_dir, artifact_path)\u001b[0m\n\u001b[1;32m    531\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mlog_artifacts\u001b[39m(\u001b[38;5;28mself\u001b[39m, run_id, local_dir, artifact_path\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m):\n\u001b[1;32m    532\u001b[0m \u001b[38;5;250m    \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m    533\u001b[0m \u001b[38;5;124;03m    Write a directory of files to the remote ``artifact_uri``.\u001b[39;00m\n\u001b[1;32m    534\u001b[0m \n\u001b[1;32m    535\u001b[0m \u001b[38;5;124;03m    :param local_dir: Path to the directory of files to write.\u001b[39;00m\n\u001b[1;32m    536\u001b[0m \u001b[38;5;124;03m    :param artifact_path: If provided, the directory in ``artifact_uri`` to write to.\u001b[39;00m\n\u001b[1;32m    537\u001b[0m \u001b[38;5;124;03m    \"\"\"\u001b[39;00m\n\u001b[0;32m--> 538\u001b[0m     \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_get_artifact_repo\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrun_id\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlog_artifacts\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlocal_dir\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43martifact_path\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/Repos/Projects/MeaLeon/.venv/lib/python3.10/site-packages/mlflow/store/artifact/http_artifact_repo.py:45\u001b[0m, in \u001b[0;36mHttpArtifactRepository.log_artifacts\u001b[0;34m(self, local_dir, artifact_path)\u001b[0m\n\u001b[1;32m     41\u001b[0m     artifact_dir \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m     42\u001b[0m         posixpath\u001b[38;5;241m.\u001b[39mjoin(artifact_path, rel_path) \u001b[38;5;28;01mif\u001b[39;00m artifact_path \u001b[38;5;28;01melse\u001b[39;00m rel_path\n\u001b[1;32m     43\u001b[0m     )\n\u001b[1;32m     44\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m f \u001b[38;5;129;01min\u001b[39;00m filenames:\n\u001b[0;32m---> 45\u001b[0m     \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlog_artifact\u001b[49m\u001b[43m(\u001b[49m\u001b[43mos\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpath\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mjoin\u001b[49m\u001b[43m(\u001b[49m\u001b[43mroot\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mf\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43martifact_dir\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/Repos/Projects/MeaLeon/.venv/lib/python3.10/site-packages/mlflow/store/artifact/http_artifact_repo.py:28\u001b[0m, in \u001b[0;36mHttpArtifactRepository.log_artifact\u001b[0;34m(self, local_file, artifact_path)\u001b[0m\n\u001b[1;32m     26\u001b[0m extra_headers \u001b[38;5;241m=\u001b[39m {\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mContent-Type\u001b[39m\u001b[38;5;124m\"\u001b[39m: mime_type}\n\u001b[1;32m     27\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mopen\u001b[39m(local_file, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mrb\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;28;01mas\u001b[39;00m f:\n\u001b[0;32m---> 28\u001b[0m     resp \u001b[38;5;241m=\u001b[39m \u001b[43mhttp_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m     29\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_host_creds\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mendpoint\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mPUT\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mf\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mextra_headers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextra_headers\u001b[49m\n\u001b[1;32m     30\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     31\u001b[0m     augmented_raise_for_status(resp)\n",
+      "File \u001b[0;32m~/Repos/Projects/MeaLeon/.venv/lib/python3.10/site-packages/mlflow/utils/rest_utils.py:120\u001b[0m, in \u001b[0;36mhttp_request\u001b[0;34m(host_creds, endpoint, method, max_retries, backoff_factor, extra_headers, retry_codes, timeout, raise_on_status, **kwargs)\u001b[0m\n\u001b[1;32m    118\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m InvalidUrlException(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mInvalid url: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00murl\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01miu\u001b[39;00m\n\u001b[1;32m    119\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[0;32m--> 120\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m MlflowException(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAPI request to \u001b[39m\u001b[38;5;132;01m{\u001b[39;00murl\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m failed with exception \u001b[39m\u001b[38;5;132;01m{\u001b[39;00me\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n",
+      "\u001b[0;31mMlflowException\u001b[0m: API request to https://dagshub.com/AaronWChen/MeaLeon.mlflow/api/2.0/mlflow-artifacts/artifacts/ad83ec0a104a44b5a16da48605603245/2e72322335494f84af9ff7e7e44c3ff9/artifacts/sklearn_model/artifacts/transformed_recipes.pkl failed with exception HTTPSConnectionPool(host='dagshub.com', port=443): Max retries exceeded with url: /AaronWChen/MeaLeon.mlflow/api/2.0/mlflow-artifacts/artifacts/ad83ec0a104a44b5a16da48605603245/2e72322335494f84af9ff7e7e44c3ff9/artifacts/sklearn_model/artifacts/transformed_recipes.pkl (Caused by SSLError(SSLEOFError(8, 'EOF occurred in violation of protocol (_ssl.c:2396)')))"
+     ]
+    }
+   ],
+   "source": [
+    "# load from MLflow\n",
+    "mlflow_client = mlflow.tracking.MlflowClient(\n",
+    "    tracking_uri=f'https://dagshub.com/{DAGSHUB_USER_NAME}/MeaLeon.mlflow')\n",
+    "\n",
+    "# cv_params are parameters for the sklearn CountVectorizer or TFIDFVectorizer\n",
+    "sklearn_transformer_params = {    \n",
+    "    'analyzer': CustomSKLearnAnalyzer().ngram_maker(\n",
+    "        min_ngram_length=1,\n",
+    "        max_ngram_length=4,\n",
+    "        ),\n",
+    "    'min_df':3,\n",
+    "    'binary':True\n",
+    "}\n",
+    "\n",
+    "# pipeline_params are parameters that will be logged in MLFlow and are a superset of library parameters\n",
+    "pipeline_params = {\n",
+    "    'stanza_model': 'en',\n",
+    "    'sklearn-transformer': 'OHE'\n",
+    "}\n",
+    "\n",
+    "# update the pipeline parameters with the library-specific ones so that they show up in MLflow Tracking\n",
+    "pipeline_params.update(sklearn_transformer_params)\n",
+    "\n",
+    "with mlflow.start_run(experiment_id=mlflow_exp_id):    \n",
+    "    # LOG PARAMETERS\n",
+    "    mlflow.log_params(pipeline_params)\n",
+    "\n",
+    "    # LOG INPUTS (QUERIES) AND OUTPUTS\n",
+    "    # MLflow example uses a list of strings or a list of str->str dicts\n",
+    "    # Will be useful in STAGING/Evaluation\n",
+    "    \n",
+    "    # LOG MODEL\n",
+    "    # Instantiate sklearn OneHotEncoder\n",
+    "    sklearn_transformer = CountVectorizer(**sklearn_transformer_params)\n",
+    "\n",
+    "    print('\\n')\n",
+    "    print('-' * 80)\n",
+    "    print('sklearn fit transform on ingredients:', end='\\n')\n",
+    "\n",
+    "    model_input = whole_nlp_df['ingredients_lemmafied']\n",
+    "\n",
+    "    print('\\n')\n",
+    "    print('-' * 80)\n",
+    "    print('Input Data: ', end='\\n')\n",
+    "    print(model_input)\n",
+    "\n",
+    "    print('\\n')\n",
+    "    print('-' * 80)\n",
+    "    print('Input Data Shape: ', end='\\n')\n",
+    "    print(model_input.shape)\n",
+    "\n",
+    "    print('\\n')\n",
+    "    print('-' * 80)\n",
+    "    print('Random 3 Records from Input Data: ', end='\\n')\n",
+    "    print(model_input.sample(3, random_state=200))\n",
+    "\n",
+    "    # Do fit transform on data\n",
+    "    response = sklearn_transformer.fit_transform(tqdm(model_input)) \n",
+    "    \n",
+    "    transformed_recipe = pd.DataFrame(\n",
+    "            response.toarray(),\n",
+    "            columns=sklearn_transformer.get_feature_names_out(),\n",
+    "            index=model_input.index\n",
+    "    )\n",
+    "\n",
+    "    signature = infer_signature(model_input=model_input,\n",
+    "                                model_output=transformed_recipe\n",
+    "                                )\n",
+    "\n",
+    "    print('\\n')\n",
+    "    print('-' * 80)\n",
+    "    print('Transformed Data:', end='\\n')\n",
+    "    print(transformed_recipe.head())\n",
+    "    \n",
+    "    # mlflow.pyfunc.save_model(\n",
+    "    #     path=model_directory,\n",
+    "    #     code_path=[\"../src/\"],\n",
+    "    #     python_model=CustomSKLearnWrapper(),\n",
+    "    #     input_example=to_nlp_df['ingredients'][0],    \n",
+    "    #     artifacts=artifacts\n",
+    "    # )\n",
+    "\n",
+    "    # combined_df = pd.concat(\n",
+    "    #     [transformed_recipe,\n",
+    "    #      whole_nlp_df\n",
+    "    #      ]\n",
+    "    #     , axis=1)\n",
+    "    # print('\\n')\n",
+    "    # print('-' * 80)\n",
+    "    # print('Combined Data:', end='\\n')\n",
+    "    # print(combined_df.head())\n",
+    "\n",
+    "    with open(sklearn_transformer_path, \"wb\") as fo:\n",
+    "        pickle.dump(sklearn_transformer, fo)\n",
+    "    \n",
+    "    with open(transformed_recipes_path, \"wb\") as fo:\n",
+    "        pickle.dump(transformed_recipe, fo)\n",
+    "    \n",
+    "    # with open(combined_df_path, 'wb') as fo:\n",
+    "    #     pickle.dump(combined_df, fo)\n",
+    "\n",
+    "\n",
+    "    model_info = mlflow.pyfunc.log_model( \n",
+    "        code_path=[\"../src/\"],\n",
+    "        python_model=CustomSKLearnWrapper(),\n",
+    "        input_example=whole_nlp_df['ingredients_lemmafied'][0],\n",
+    "        signature=signature,        \n",
+    "        artifact_path=\"sklearn_model\",\n",
+    "        artifacts=artifacts\n",
+    "        ) \n",
+    "\n",
+    "    # since this uses a custom Stanza analyzer, we have to use a custom mlflow.Pyfunc.PythonModel\n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# pre_proc_df is cleaned dataframe\n",
+    "whole_nlp_df = dfpp.preprocess_dataframe(raw_df)\n",
+    "print('\\n')\n",
+    "print('--------------')\n",
+    "print('Preprocessed Dataframe:', end='\\n')\n",
+    "print(whole_nlp_df.head())\n",
+    "print(whole_nlp_df.shape)\n",
+    "\n",
+    "# cv_params are parameters for the sklearn CountVectorizer or TFIDFVectorizer\n",
+    "sklearn_transformer_params = {    \n",
+    "    'analyzer': CustomSKLearnAnalyzer().ngram_maker(\n",
+    "        min_ngram_length=1,\n",
+    "        max_ngram_length=4,\n",
+    "        ),\n",
+    "    'min_df':3,\n",
+    "}\n",
+    "\n",
+    "sklearn_transformer = TfidfVectorizer(**sklearn_transformer_params)\n",
+    "\n",
+    "model_input = whole_nlp_df['ingredients_lemmafied']\n",
+    "\n",
+    "# Do fit transform on data\n",
+    "print(\"fit_transform start: \" + str(datetime.now()))\n",
+    "response = sklearn_transformer.fit_transform(tqdm(model_input)) \n",
+    "print(\"fit_transform end: \" + str(datetime.now()))\n",
+    "\n",
+    "transformed_recipe = pd.DataFrame(\n",
+    "        response.toarray(),\n",
+    "        columns=sklearn_transformer.get_feature_names_out(),\n",
+    "        index=model_input.index\n",
+    ")\n",
+    "\n",
+    "combined_df = pd.concat([transformed_recipe, whole_nlp_df], axis=1)\n",
+    "\n",
+    "with open(\"../joblib/2024.03.19/combined_df.joblib\", 'wb') as fo:\n",
+    "    joblib.dump()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "response"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "test_predictor = mlflow.pyfunc.load_model(model_uri=model_info.model_uri)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# pre_proc_df is cleaned dataframe\n",
+    "pre_proc_test_df = dfpp.preprocess_dataframe(test_df)\n",
+    "print('\\n')\n",
+    "print('--------------')\n",
+    "print('Preprocessed Dataframe: ', end='\\n')\n",
+    "print(pre_proc_test_df.head())\n",
+    "print(pre_proc_test_df.shape)\n",
+    "\n",
+    "# create subset for dev purposes\n",
+    "# to_nlp_test_df = pre_proc_test_df\n",
+    "# print('\\n')\n",
+    "# print('-' * 80)\n",
+    "# print('Subset Dataframe:', end='\\n')\n",
+    "# print(to_nlp_test_df.head())\n",
+    "# print(to_nlp_test_df.shape)\n",
+    "\n",
+    "test_model_input = pre_proc_test_df['ingredients']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "test_model_input"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "test_model_input.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "test_model_input.values"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model_info.signature.to_dict()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "test_predictor.predict(test_model_input)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print('\\n')\n",
+    "print('-' * 80)\n",
+    "print('Input Data: ', end='\\n')\n",
+    "print(test_model_input)\n",
+    "\n",
+    "print('\\n')\n",
+    "print('-' * 80)\n",
+    "print('Input Data Shape: ', end='\\n')\n",
+    "print(test_model_input.shape)\n",
+    "\n",
+    "print('\\n')\n",
+    "print('-' * 80)\n",
+    "print('Random 3 Records from Input Data: ', end='\\n')\n",
+    "print(test_model_input.sample(3, random_state=200))\n",
+    "\n",
+    "# test_response = sklearn_transformer.transform(tqdm(test_model_input)) \n",
+    "test_response = sklearn_transformer.transform(test_model_input)\n",
+    "    \n",
+    "    \n",
+    "test_transformed_recipe = pd.DataFrame(\n",
+    "            test_response.toarray(),\n",
+    "            columns=sklearn_transformer.get_feature_names_out(),\n",
+    "            index=test_model_input.index\n",
+    "    )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "type(test_predictor)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "test_transformed_recipe"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "python3",
+   "language": "python",
+   "name": "python3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}