diff --git a/docs/examples/example_estimating_ates.ipynb b/docs/examples/example_estimating_ates.ipynb
index 0eda7327..4cbd597c 100644
--- a/docs/examples/example_estimating_ates.ipynb
+++ b/docs/examples/example_estimating_ates.ipynb
@@ -20,7 +20,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -40,7 +40,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -99,9 +99,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(2.083595103597918, 0.06526671583747883)"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "naive_lm = smf.ols(f\"{outcome_column} ~ {treatment_column}\", df) .fit(cov_type=\"HC1\")\n",
     "naive_est = naive_lm.params.iloc[1], naive_lm.bse.iloc[1]\n",
@@ -110,9 +121,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(2.1433722387308025, 0.06345124983351998)"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "covaradjust_lm = smf.ols(f\"{outcome_column} ~ {treatment_column}+{'+'.join(feature_columns)}\",\n",
     "                   df) .fit(cov_type=\"HC1\")\n",
@@ -138,9 +160,42 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<style type='text/css'>\n",
+       ".datatable table.frame { margin-bottom: 0; }\n",
+       ".datatable table.frame thead { border-bottom: none; }\n",
+       ".datatable table.frame tr.coltypes td {  color: #FFFFFF;  line-height: 6px;  padding: 0 0.5em;}\n",
+       ".datatable .bool    { background: #DDDD99; }\n",
+       ".datatable .object  { background: #565656; }\n",
+       ".datatable .int     { background: #5D9E5D; }\n",
+       ".datatable .float   { background: #4040CC; }\n",
+       ".datatable .str     { background: #CC4040; }\n",
+       ".datatable .time    { background: #40CC40; }\n",
+       ".datatable .row_index {  background: var(--jp-border-color3);  border-right: 1px solid var(--jp-border-color0);  color: var(--jp-ui-font-color3);  font-size: 9px;}\n",
+       ".datatable .frame tbody td { text-align: left; }\n",
+       ".datatable .frame tr.coltypes .row_index {  background: var(--jp-border-color0);}\n",
+       ".datatable th:nth-child(2) { padding-left: 12px; }\n",
+       ".datatable .hellipsis {  color: var(--jp-cell-editor-border-color);}\n",
+       ".datatable .vellipsis {  background: var(--jp-layout-color0);  color: var(--jp-cell-editor-border-color);}\n",
+       ".datatable .na {  color: var(--jp-cell-editor-border-color);  font-size: 80%;}\n",
+       ".datatable .sp {  opacity: 0.25;}\n",
+       ".datatable .footer { font-size: 9px; }\n",
+       ".datatable .frame_dimensions {  background: var(--jp-border-color3);  border-top: 1px solid var(--jp-border-color0);  color: var(--jp-ui-font-color3);  display: inline-block;  opacity: 0.6;  padding: 1px 10px 1px 5px;}\n",
+       "</style>\n"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
    "source": [
     "from metalearners import DRLearner\n",
     "from lightgbm import LGBMRegressor, LGBMClassifier\n",
@@ -149,9 +204,26 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
+   "execution_count": 6,
+   "metadata": {
+    "editable": true,
+    "slideshow": {
+     "slide_type": ""
+    },
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(array([1.02931589]), array([0.06679633]))"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "metalearners_dr = DRLearner(\n",
     "    nuisance_model_factory=LGBMRegressor,\n",
@@ -558,9 +630,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "dev",
    "language": "python",
-   "name": "python3"
+   "name": "dev"
   },
   "language_info": {
    "codemirror_mode": {
@@ -572,7 +644,10 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.9"
+   "version": "3.11.7"
+  },
+  "mystnb": {
+   "execution_timeout": 60
   }
  },
  "nbformat": 4,
diff --git a/docs/examples/example_sparse_inputs.ipynb b/docs/examples/example_sparse_inputs.ipynb
index 04677eca..50c797b3 100644
--- a/docs/examples/example_sparse_inputs.ipynb
+++ b/docs/examples/example_sparse_inputs.ipynb
@@ -20,9 +20,42 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<style type='text/css'>\n",
+       ".datatable table.frame { margin-bottom: 0; }\n",
+       ".datatable table.frame thead { border-bottom: none; }\n",
+       ".datatable table.frame tr.coltypes td {  color: #FFFFFF;  line-height: 6px;  padding: 0 0.5em;}\n",
+       ".datatable .bool    { background: #DDDD99; }\n",
+       ".datatable .object  { background: #565656; }\n",
+       ".datatable .int     { background: #5D9E5D; }\n",
+       ".datatable .float   { background: #4040CC; }\n",
+       ".datatable .str     { background: #CC4040; }\n",
+       ".datatable .time    { background: #40CC40; }\n",
+       ".datatable .row_index {  background: var(--jp-border-color3);  border-right: 1px solid var(--jp-border-color0);  color: var(--jp-ui-font-color3);  font-size: 9px;}\n",
+       ".datatable .frame tbody td { text-align: left; }\n",
+       ".datatable .frame tr.coltypes .row_index {  background: var(--jp-border-color0);}\n",
+       ".datatable th:nth-child(2) { padding-left: 12px; }\n",
+       ".datatable .hellipsis {  color: var(--jp-cell-editor-border-color);}\n",
+       ".datatable .vellipsis {  background: var(--jp-layout-color0);  color: var(--jp-cell-editor-border-color);}\n",
+       ".datatable .na {  color: var(--jp-cell-editor-border-color);  font-size: 80%;}\n",
+       ".datatable .sp {  opacity: 0.25;}\n",
+       ".datatable .footer { font-size: 9px; }\n",
+       ".datatable .frame_dimensions {  background: var(--jp-border-color3);  border-top: 1px solid var(--jp-border-color0);  color: var(--jp-ui-font-color3);  display: inline-block;  opacity: 0.6;  padding: 1px 10px 1px 5px;}\n",
+       "</style>\n"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
    "source": [
     "import time, psutil, os, gc\n",
     "import numpy as np\n",
@@ -43,7 +76,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -71,7 +104,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -141,7 +174,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -154,9 +187,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Sparse data memory: 7.63MB\n",
+      "Dense data memory: 953.66MB\n"
+     ]
+    }
+   ],
    "source": [
     "print(f\"\\nSparse data memory: {X_csr.data.nbytes / 1024 / 1024:.2f}MB\")\n",
     "print(f\"Dense data memory: {X_np.nbytes / 1024 / 1024:.2f}MB\")"
@@ -171,7 +214,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -207,15 +250,6 @@
     "    print(metalearners_est)"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "gc.collect()"
-   ]
-  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -225,12 +259,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 7,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Sparse data - Runtime: 13.27s, Memory used: 345.56MB\n",
+      "(array([1.0007226]), array([0.02021719]))\n"
+     ]
+    }
+   ],
    "source": [
-    "fit_drlearner_wrapper(X_csr)\n",
-    "gc.collect()"
+    "fit_drlearner_wrapper(X_csr)"
    ]
   },
   {
@@ -242,12 +284,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 8,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Sparse data - Runtime: 149.10s, Memory used: 70.43MB\n",
+      "(array([1.00067664]), array([0.02021555]))\n"
+     ]
+    }
+   ],
    "source": [
-    "fit_drlearner_wrapper(X_np)\n",
-    "gc.collect()"
+    "fit_drlearner_wrapper(X_np)"
    ]
   },
   {
@@ -256,147 +306,6 @@
    "source": [
     "In this (admittedly somewhat contrived) example, we that solving the DRLearner problem with sparse inputs takes around 1/8 of the time compared to dense inputs at the cost of some more memory usage in estimation. "
    ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Prediction \n",
-    "\n",
-    "These benefits aren't limited to causal inference. We can also use sparse matrices for prediction tasks as well."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def generate_dummy_data(n_samples=100000, n_categories=1000, n_features=20):\n",
-    "    X = np.random.randint(0, n_categories, size=(n_samples, n_features))\n",
-    "    y = np.zeros(n_samples)\n",
-    "    # Select a few features for main effects\n",
-    "    main_effect_features = np.random.choice(n_features, 3, replace=False)\n",
-    "    # Create main effects\n",
-    "    for i in main_effect_features:\n",
-    "        # Create a random effect for each category\n",
-    "        category_effects = np.random.normal(0, 1, n_categories)\n",
-    "        y += category_effects[X[:, i]]\n",
-    "    # Select a couple of feature pairs for interaction effects\n",
-    "    interaction_pairs = [\n",
-    "        (i, j) for i in range(n_features) for j in range(i + 1, n_features)\n",
-    "    ]\n",
-    "    selected_interactions = np.random.choice(len(interaction_pairs), 2, replace=False)\n",
-    "    # Create interaction effects\n",
-    "    for idx in selected_interactions:\n",
-    "        i, j = interaction_pairs[idx]\n",
-    "        # Create a sparse interaction effect\n",
-    "        interaction_effect = np.random.choice(\n",
-    "            [-1, 0, 1], size=(n_categories, n_categories), p=[0.05, 0.9, 0.05]\n",
-    "        )\n",
-    "        y += interaction_effect[X[:, i], X[:, j]]\n",
-    "    # Add a non-linear effect for one feature\n",
-    "    nonlinear_feature = np.random.choice(n_features)\n",
-    "    y += np.square(X[:, nonlinear_feature] / (n_categories / 2) - 1)\n",
-    "    y = (y - np.mean(y)) / np.std(y)\n",
-    "    y += np.random.normal(0, 0.1, n_samples)\n",
-    "\n",
-    "    return X, y"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def prepare_data(X):\n",
-    "    e1 = OneHotEncoder(sparse_output=True)\n",
-    "    # dense - use pd.get_dummies to mimic current practice\n",
-    "    Xdf = pd.DataFrame(X)\n",
-    "    return e1.fit_transform(X), pd.get_dummies(Xdf, columns=Xdf.columns).values\n",
-    "\n",
-    "def fit_and_measure(X_train, y_train, X_test, y_test):\n",
-    "    start_memory = get_memory_usage()\n",
-    "    start_time = time.time()\n",
-    "    m = LGBMRegressor(n_estimators=100, max_depth=5, learning_rate=0.1, verbose=-1)\n",
-    "    m.fit(X_train, y_train)\n",
-    "    end_time = time.time()\n",
-    "    end_memory = get_memory_usage()\n",
-    "    runtime = end_time - start_time\n",
-    "    memory_used = end_memory - start_memory\n",
-    "\n",
-    "    # Compute accuracy metrics\n",
-    "    y_pred = m.predict(X_test)\n",
-    "    mse = mean_squared_error(y_test, y_pred)\n",
-    "    r2 = r2_score(y_test, y_pred)\n",
-    "\n",
-    "    return runtime, memory_used, mse, r2"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "X, y = generate_dummy_data()\n",
-    "# Split the data into train and test sets\n",
-    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
-    "X_train_sparse, X_train_dense = prepare_data(X_train)\n",
-    "X_test_sparse, X_test_dense = prepare_data(X_test)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%%time\n",
-    "sparse_runtime, sparse_memory, sparse_mse, sparse_r2 = fit_and_measure(X_train_sparse, y_train, X_test_sparse, y_test)\n",
-    "gc.collect()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%%time\n",
-    "dense_runtime, dense_memory, dense_mse, dense_r2 = fit_and_measure(X_train_dense, y_train, X_test_dense, y_test)\n",
-    "gc.collect()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Mypy can't find these names/variables since they are assigned to via cell-magic.\n",
-    "print(\n",
-    "    f\"Sparse data - Runtime: {sparse_runtime:.2f}s, \"  # type: ignore[name-defined]\n",
-    "    f\"Memory used: {sparse_memory:.2f}MB, \"  # type: ignore[name-defined]\n",
-    "    f\"MSE: {sparse_mse:.4f}, R2: {sparse_r2:.4f}\"  # type: ignore[name-defined]\n",
-    ") \n",
-    "print(\n",
-    "    f\"Dense data - Runtime: {dense_runtime:.2f}s, \"  # type: ignore[name-defined]\n",
-    "    f\"Memory used: {dense_memory:.2f}MB, \"  # type: ignore[name-defined]\n",
-    "    f\"MSE: {dense_mse:.4f}, R2: {dense_r2:.4f}\"  # type: ignore[name-defined]\n",
-    ")\n",
-    "\n",
-    "print(f\"\\nSparse data memory: {X_train_sparse.data.nbytes / 1024 / 1024:.2f}MB\")\n",
-    "print(f\"Dense data memory: {X_train_dense.nbytes / 1024 / 1024:.2f}MB\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {
@@ -415,7 +324,10 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.12.3"
+   "version": "3.11.7"
+  },
+  "mystnb": {
+   "execution_timeout": 120
   }
  },
  "nbformat": 4,