From e2adaf4e91799a02f95e9af55b8c5aae8279146e Mon Sep 17 00:00:00 2001
From: Hector Lira <hector.liratl@gmail.com>
Date: Wed, 8 Feb 2023 13:17:52 -0600
Subject: [PATCH 01/12] Add test notebook

---
 src/fklearn/causal/validation/ci_test.ipynb | 677 ++++++++++++++++++++
 1 file changed, 677 insertions(+)
 create mode 100644 src/fklearn/causal/validation/ci_test.ipynb
diff --git a/src/fklearn/causal/validation/ci_test.ipynb b/src/fklearn/causal/validation/ci_test.ipynb
new file mode 100644
index 00000000..f7d0ff27
--- /dev/null
+++ b/src/fklearn/causal/validation/ci_test.ipynb
@@ -0,0 +1,677 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "\n",
+    "from fklearn.causal.effects import linear_effect\n",
+    "from fklearn.causal.validation.curves import cumulative_effect_curve"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from typing import List\n",
+    "\n",
+    "from toolz import curry\n",
+    "from fklearn.types import EffectFnType"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pd.DataFrame(dict(\n",
+    "    t=[1, 1, 1, 2, 2, 2, 3, 3, 3],\n",
+    "    x=[1, 2, 3, 1, 2, 3, 1, 2, 3],\n",
+    "    y=[1, 1, 1, 2, 3, 4, 3, 5, 7],\n",
+    "))\n",
+    "\n",
+    "result = cumulative_effect_curve(df, prediction=\"x\", outcome=\"y\", treatment=\"t\", min_rows=3, steps=df.shape[0],\n",
+    "                                 effect_fn=linear_effect)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([3.        , 3.        , 2.92857143, 2.5       , 2.5       ,\n",
+       "       2.46153846, 2.        ])"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "result"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 86,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# def linear_ci(df, y, t, z=1.96):\n",
+    "#     n = df.shape[0]\n",
+    "#     t_bar = df[t].mean()\n",
+    "#     beta1 = linear_effect(df, y, t)\n",
+    "#     beta0 = df[y].mean() - beta1 * t_bar\n",
+    "#     e = df[y] - (beta0 + beta1*df[t])\n",
+    "#     se = np.sqrt(((1/(n-2))*np.sum(e**2))/np.sum((df[t]-t_bar)**2))\n",
+    "#     return np.array([beta1 - z*se, beta1 + z*se])\n",
+    "\n",
+    "def linear_ci(df, t, y, z=1.96):\n",
+    "    n = df.shape[0]\n",
+    "    t_bar = df[t].mean()\n",
+    "    beta1 = linear_effect(df, t, y)\n",
+    "    beta0 = df[y].mean() - beta1 * t_bar\n",
+    "    e = df[y] - (beta0 + beta1*df[t])\n",
+    "    se = np.sqrt(((1/(n-2))*np.sum(e**2))/np.sum((df[t]-t_bar)**2))\n",
+    "    print(f\"\"\"\n",
+    "    n: {n}\n",
+    "    t_bar: {t_bar}\n",
+    "    beta1: {beta1}\n",
+    "    beta0: {beta0}\n",
+    "    \"\"\")\n",
+    "    return z*se"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 87,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "    n: 3\n",
+      "    t_bar: 2.0\n",
+      "    beta1: 3.0\n",
+      "    beta0: -2.0\n",
+      "    \n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "0.0"
+      ]
+     },
+     "execution_count": 87,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "linear_ci(ordered_df.head(3), \"t\", \"y\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 82,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>t</th>\n",
+       "      <th>x</th>\n",
+       "      <th>y</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>4</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>3</td>\n",
+       "      <td>3</td>\n",
+       "      <td>7</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   t  x  y\n",
+       "2  1  3  1\n",
+       "5  2  3  4\n",
+       "8  3  3  7"
+      ]
+     },
+     "execution_count": 82,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "ordered_df = df.sort_values(by=\"x\", ascending=False)\n",
+    "ordered_df.head(3)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 80,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "@curry\n",
+    "def effect_curves(\n",
+    "    df: pd.DataFrame,\n",
+    "    treatment: str,\n",
+    "    outcome: str,\n",
+    "    prediction: str,\n",
+    "    min_rows: int = 30,\n",
+    "    steps: int = 100,\n",
+    "    effect_fn: EffectFnType = linear_effect,\n",
+    "    ci_fn = None,\n",
+    ") -> pd.DataFrame:\n",
+    "\n",
+    "    size: int = df.shape[0]\n",
+    "    n_rows: List[int] = list(range(min_rows, size, size // steps)) + [size]\n",
+    "\n",
+    "    cum_effect: np.ndarray = cumulative_effect_curve(\n",
+    "        df=df,\n",
+    "        treatment=treatment,\n",
+    "        outcome=outcome,\n",
+    "        prediction=prediction,\n",
+    "        min_rows=min_rows,\n",
+    "        steps=steps,\n",
+    "        effect_fn=effect_fn,\n",
+    "    )\n",
+    "    ate: float = cum_effect[-1]\n",
+    "    \n",
+    "    effect_curves = pd.DataFrame({\"samples_count\": n_rows, \"cumulative_effect_curve\": cum_effect}).assign(\n",
+    "        samples_fraction=lambda x: x[\"samples_count\"] / size,\n",
+    "        cumulative_gain_curve=lambda x: x[\"samples_fraction\"] * x[\"cumulative_effect_curve\"],\n",
+    "        random_model_cumulative_gain_curve=lambda x: x[\"samples_fraction\"] * ate,\n",
+    "        relative_cumulative_gain_curve=lambda x: (\n",
+    "            x[\"samples_fraction\"] * x[\"cumulative_effect_curve\"] - x[\"random_model_cumulative_gain_curve\"]\n",
+    "        ),\n",
+    "    )\n",
+    "    \n",
+    "    if ci_fn is not None:\n",
+    "        \n",
+    "        # crear un wrapper de linear_ci parecido a cumulative_effect_curve (wrapper) y effect_fn (similitud con linear_ci)\n",
+    "        # el wrapper debe de solo arrojar un arreglo (igual que cumulative_effect_curve)\n",
+    "        ordered_df = df.sort_values(prediction, ascending=False).reset_index(drop=True)\n",
+    "        print(ordered_df.head(n_rows[0]), n_rows[0])\n",
+    "        effect_errors = np.array([ci_fn(ordered_df.head(rows), treatment, outcome)  for rows in n_rows])\n",
+    "        \n",
+    "        effect_curves = effect_curves.assign(\n",
+    "            cumulative_effect_curve_error=effect_errors,\n",
+    "            cumulative_gain_curve_error=lambda x: x[\"samples_fraction\"] * x[\"cumulative_effect_curve_error\"],\n",
+    "        )\n",
+    "\n",
+    "    return effect_curves\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pd.DataFrame(dict(\n",
+    "    t=[1, 1, 1, 2, 2, 2, 3, 3, 3],\n",
+    "    x=[1, 2, 3, 1, 2, 3, 1, 2, 3],\n",
+    "    y=[1, 1, 1, 2, 3, 4, 3, 5, 7],\n",
+    "))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 81,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "   t  x  y\n",
+      "0  1  3  1\n",
+      "1  2  3  4\n",
+      "2  3  3  7 3\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>samples_count</th>\n",
+       "      <th>cumulative_effect_curve</th>\n",
+       "      <th>samples_fraction</th>\n",
+       "      <th>cumulative_gain_curve</th>\n",
+       "      <th>random_model_cumulative_gain_curve</th>\n",
+       "      <th>relative_cumulative_gain_curve</th>\n",
+       "      <th>cumulative_effect_curve_error</th>\n",
+       "      <th>cumulative_gain_curve_error</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>3</td>\n",
+       "      <td>3.000000</td>\n",
+       "      <td>0.333333</td>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>0.666667</td>\n",
+       "      <td>0.333333</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>4</td>\n",
+       "      <td>3.000000</td>\n",
+       "      <td>0.444444</td>\n",
+       "      <td>1.333333</td>\n",
+       "      <td>0.888889</td>\n",
+       "      <td>0.444444</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>5</td>\n",
+       "      <td>2.928571</td>\n",
+       "      <td>0.555556</td>\n",
+       "      <td>1.626984</td>\n",
+       "      <td>1.111111</td>\n",
+       "      <td>0.515873</td>\n",
+       "      <td>0.599444</td>\n",
+       "      <td>0.333025</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>6</td>\n",
+       "      <td>2.500000</td>\n",
+       "      <td>0.666667</td>\n",
+       "      <td>1.666667</td>\n",
+       "      <td>1.333333</td>\n",
+       "      <td>0.333333</td>\n",
+       "      <td>0.774758</td>\n",
+       "      <td>0.516505</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>7</td>\n",
+       "      <td>2.500000</td>\n",
+       "      <td>0.777778</td>\n",
+       "      <td>1.944444</td>\n",
+       "      <td>1.555556</td>\n",
+       "      <td>0.388889</td>\n",
+       "      <td>0.628855</td>\n",
+       "      <td>0.489110</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>8</td>\n",
+       "      <td>2.461538</td>\n",
+       "      <td>0.888889</td>\n",
+       "      <td>2.188034</td>\n",
+       "      <td>1.777778</td>\n",
+       "      <td>0.410256</td>\n",
+       "      <td>0.765483</td>\n",
+       "      <td>0.680429</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>9</td>\n",
+       "      <td>2.000000</td>\n",
+       "      <td>1.000000</td>\n",
+       "      <td>2.000000</td>\n",
+       "      <td>2.000000</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.956382</td>\n",
+       "      <td>0.956382</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   samples_count  cumulative_effect_curve  samples_fraction  \\\n",
+       "0              3                 3.000000          0.333333   \n",
+       "1              4                 3.000000          0.444444   \n",
+       "2              5                 2.928571          0.555556   \n",
+       "3              6                 2.500000          0.666667   \n",
+       "4              7                 2.500000          0.777778   \n",
+       "5              8                 2.461538          0.888889   \n",
+       "6              9                 2.000000          1.000000   \n",
+       "\n",
+       "   cumulative_gain_curve  random_model_cumulative_gain_curve  \\\n",
+       "0               1.000000                            0.666667   \n",
+       "1               1.333333                            0.888889   \n",
+       "2               1.626984                            1.111111   \n",
+       "3               1.666667                            1.333333   \n",
+       "4               1.944444                            1.555556   \n",
+       "5               2.188034                            1.777778   \n",
+       "6               2.000000                            2.000000   \n",
+       "\n",
+       "   relative_cumulative_gain_curve  cumulative_effect_curve_error  \\\n",
+       "0                        0.333333                       0.000000   \n",
+       "1                        0.444444                       0.000000   \n",
+       "2                        0.515873                       0.599444   \n",
+       "3                        0.333333                       0.774758   \n",
+       "4                        0.388889                       0.628855   \n",
+       "5                        0.410256                       0.765483   \n",
+       "6                        0.000000                       0.956382   \n",
+       "\n",
+       "   cumulative_gain_curve_error  \n",
+       "0                     0.000000  \n",
+       "1                     0.000000  \n",
+       "2                     0.333025  \n",
+       "3                     0.516505  \n",
+       "4                     0.489110  \n",
+       "5                     0.680429  \n",
+       "6                     0.956382  "
+      ]
+     },
+     "execution_count": 81,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "effect_curves(\n",
+    "    df=df,\n",
+    "    treatment=\"t\",\n",
+    "    outcome=\"y\",\n",
+    "    prediction=\"x\",\n",
+    "    min_rows = 3,\n",
+    "    steps = df.shape[0],\n",
+    "    effect_fn = linear_effect,\n",
+    "    ci_fn = linear_ci,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 64,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>t</th>\n",
+       "      <th>x</th>\n",
+       "      <th>y</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>3</td>\n",
+       "      <td>2</td>\n",
+       "      <td>5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>4</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>3</td>\n",
+       "      <td>3</td>\n",
+       "      <td>7</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   t  x  y\n",
+       "0  1  1  1\n",
+       "3  2  1  2\n",
+       "6  3  1  3\n",
+       "1  1  2  1\n",
+       "4  2  2  3\n",
+       "7  3  2  5\n",
+       "2  1  3  1\n",
+       "5  2  3  4\n",
+       "8  3  3  7"
+      ]
+     },
+     "execution_count": 64,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.sort_values(by=\"x\", ascending=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 51,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([0.        , 0.        , 0.59944419, 0.77475803, 0.62885517,\n",
+       "       0.76548284, 0.95638207])"
+      ]
+     },
+     "execution_count": 51,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "cumulative_elast_curve_ci(\n",
+    "    dataset=df,\n",
+    "    prediction=\"x\",\n",
+    "    y=\"y\",\n",
+    "    t=\"t\",\n",
+    "    min_periods=3,\n",
+    "    steps=df.shape[0]\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "linear_effect(df, \"y\", \"t\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 50,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def elast_ci(df, y, t, z=1.96):\n",
+    "    n = df.shape[0]\n",
+    "    t_bar = df[t].mean()\n",
+    "#     beta1 = elast(df, y, t)\n",
+    "    beta1 = linear_effect(df, t, y)\n",
+    "    beta0 = df[y].mean() - beta1 * t_bar\n",
+    "    e = df[y] - (beta0 + beta1*df[t])\n",
+    "    se = np.sqrt(((1/(n-2))*np.sum(e**2))/np.sum((df[t]-t_bar)**2))\n",
+    "#     return np.array([beta1 - z*se, beta1 + z*se])\n",
+    "    return z*se\n",
+    "\n",
+    "@curry\n",
+    "def elast(data, y, t):\n",
+    "        # line coeficient for the one variable linear regression\n",
+    "        return (np.sum((data[t] - data[t].mean())*(data[y] - data[y].mean())) /\n",
+    "                np.sum((data[t] - data[t].mean())**2))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def cumulative_elast_curve_ci(dataset, prediction, y, t, min_periods=30, steps=100):\n",
+    "    size = dataset.shape[0]\n",
+    "    ordered_df = dataset.sort_values(prediction, ascending=False).reset_index(drop=True)\n",
+    "    n_rows = list(range(min_periods, size, size // steps)) + [size]\n",
+    "    \n",
+    "    # just replacing a call to `elast` by a call to `elast_ci`\n",
+    "    return np.array([elast_ci(ordered_df.head(rows), y, t)  for rows in n_rows])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}

From ef19bd9ae8dfcdb20c7659aa27f4f6a5b26d5d0c Mon Sep 17 00:00:00 2001
From: Hector Lira <hector.liratl@gmail.com>
Date: Tue, 21 Feb 2023 09:50:45 -0600
Subject: [PATCH 02/12] Modify prototype functions in notebook

---
 src/fklearn/causal/validation/ci_test.ipynb | 139 ++++++++++++--------
 1 file changed, 81 insertions(+), 58 deletions(-)

diff --git a/src/fklearn/causal/validation/ci_test.ipynb b/src/fklearn/causal/validation/ci_test.ipynb
index f7d0ff27..b8825c68 100644
--- a/src/fklearn/causal/validation/ci_test.ipynb
+++ b/src/fklearn/causal/validation/ci_test.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -15,11 +15,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [],
    "source": [
-    "from typing import List\n",
+    "from typing import Any, List\n",
     "\n",
     "from toolz import curry\n",
     "from fklearn.types import EffectFnType"
@@ -27,7 +27,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -43,7 +43,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
@@ -53,7 +53,7 @@
        "       2.46153846, 2.        ])"
       ]
      },
-     "execution_count": 6,
+     "execution_count": 4,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -64,7 +64,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 86,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -84,39 +84,21 @@
     "    beta0 = df[y].mean() - beta1 * t_bar\n",
     "    e = df[y] - (beta0 + beta1*df[t])\n",
     "    se = np.sqrt(((1/(n-2))*np.sum(e**2))/np.sum((df[t]-t_bar)**2))\n",
-    "    print(f\"\"\"\n",
-    "    n: {n}\n",
-    "    t_bar: {t_bar}\n",
-    "    beta1: {beta1}\n",
-    "    beta0: {beta0}\n",
-    "    \"\"\")\n",
     "    return z*se"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 87,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "    n: 3\n",
-      "    t_bar: 2.0\n",
-      "    beta1: 3.0\n",
-      "    beta0: -2.0\n",
-      "    \n"
-     ]
-    },
     {
      "data": {
       "text/plain": [
        "0.0"
       ]
      },
-     "execution_count": 87,
+     "execution_count": 10,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -127,7 +109,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 82,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [
     {
@@ -186,7 +168,7 @@
        "8  3  3  7"
       ]
      },
-     "execution_count": 82,
+     "execution_count": 7,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -198,14 +180,32 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [],
-   "source": []
+   "source": [
+    "\n",
+    "def confidence_interval_curve(\n",
+    "    df: pd.DataFrame,\n",
+    "    treatment: str,\n",
+    "    outcome: str,\n",
+    "    prediction: str,\n",
+    "    min_rows: int = 30,\n",
+    "    steps: int = 100,\n",
+    "    ci_fn: EffectFnType = linear_ci,\n",
+    "    **kwargs,\n",
+    ") -> np.ndarray:\n",
+    "    \n",
+    "    size = df.shape[0]\n",
+    "    ordered_df = df.sort_values(prediction, ascending=False).reset_index(drop=True)\n",
+    "    n_rows = list(range(min_rows, size, size // steps)) + [size]\n",
+    "    \n",
+    "    return np.array([ci_fn(ordered_df.head(rows), treatment, outcome, **kwargs) for rows in n_rows])"
+   ]
   },
   {
    "cell_type": "code",
-   "execution_count": 80,
+   "execution_count": 19,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -218,7 +218,8 @@
     "    min_rows: int = 30,\n",
     "    steps: int = 100,\n",
     "    effect_fn: EffectFnType = linear_effect,\n",
-    "    ci_fn = None,\n",
+    "    ci_fn: EffectFnType = None,\n",
+    "    **kwargs,\n",
     ") -> pd.DataFrame:\n",
     "\n",
     "    size: int = df.shape[0]\n",
@@ -246,11 +247,16 @@
     "    \n",
     "    if ci_fn is not None:\n",
     "        \n",
-    "        # crear un wrapper de linear_ci parecido a cumulative_effect_curve (wrapper) y effect_fn (similitud con linear_ci)\n",
-    "        # el wrapper debe de solo arrojar un arreglo (igual que cumulative_effect_curve)\n",
-    "        ordered_df = df.sort_values(prediction, ascending=False).reset_index(drop=True)\n",
-    "        print(ordered_df.head(n_rows[0]), n_rows[0])\n",
-    "        effect_errors = np.array([ci_fn(ordered_df.head(rows), treatment, outcome)  for rows in n_rows])\n",
+    "        effect_errors: np.ndarray = confidence_interval_curve(\n",
+    "            df=df,\n",
+    "            treatment=treatment,\n",
+    "            outcome=outcome,\n",
+    "            prediction=prediction,\n",
+    "            min_rows=min_rows,\n",
+    "            steps=steps,\n",
+    "            ci_fn=ci_fn,\n",
+    "            **kwargs,\n",
+    "        )\n",
     "        \n",
     "        effect_curves = effect_curves.assign(\n",
     "            cumulative_effect_curve_error=effect_errors,\n",
@@ -262,7 +268,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -275,19 +281,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 81,
+   "execution_count": 24,
    "metadata": {},
    "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "   t  x  y\n",
-      "0  1  3  1\n",
-      "1  2  3  4\n",
-      "2  3  3  7 3\n"
-     ]
-    },
     {
      "data": {
       "text/html": [
@@ -439,7 +435,7 @@
        "6                     0.956382  "
       ]
      },
-     "execution_count": 81,
+     "execution_count": 24,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -454,6 +450,7 @@
     "    steps = df.shape[0],\n",
     "    effect_fn = linear_effect,\n",
     "    ci_fn = linear_ci,\n",
+    "    z = 1.96\n",
     ")"
    ]
   },
@@ -571,7 +568,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 51,
+   "execution_count": 16,
    "metadata": {},
    "outputs": [
     {
@@ -581,7 +578,7 @@
        "       0.76548284, 0.95638207])"
       ]
      },
-     "execution_count": 51,
+     "execution_count": 16,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -599,16 +596,27 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 17,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.35294117647058826"
+      ]
+     },
+     "execution_count": 17,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "linear_effect(df, \"y\", \"t\")"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 50,
+   "execution_count": 14,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -632,7 +640,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 34,
+   "execution_count": 18,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -650,7 +658,22 @@
    "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "source": []
+   "source": [
+    "# siguientes tasks:\n",
+    "# - Agregar nuevos argumentos a las funciones y documentación de las funciones\n",
+    "# - Modificar cumulative effect curves con nuevos cambios\n",
+    "# - Crear archivo aparte de \"confidence intervals/errors\" (effects.py) y ahí poner linear_ci\n",
+    "# - Crear archivo aparte de \"curves\" (curves.py) y ahí poner confidence_interval_curve\n",
+    "# - Crear un nuevo tipo de variable (?, ErrorFnType) con el mismo signature de EffectFnType\n",
+    "# - Agregar tests con los ejemplos de este notebook\n",
+    "# - Modificar índice de la documentación\n",
+    "# - Abrir PR\n",
+    "#   - agregar comentarios:\n",
+    "#     1. discusión de los tipos de las funciones: tienen el mismo signature, podemos pensar en algo más genérico?\n",
+    "#     2. confidence_interval_curve y cumulative_effect_curve hacen lo mismo, podemos pensar en algo más genérico?\n",
+    "#     3. cómo atacar la vulnerabilidad de que las curvas y los errores son arreglos que tienen que ser del mismo tamaño.\n",
+    "#        Ahorita lo estamos hard-coding pero existirá alguna mejor manera de lidiar con esto?"
+   ]
   }
  ],
  "metadata": {

From c5adc625cfd2e2ca83c6a64bdf28a9c5c633862c Mon Sep 17 00:00:00 2001
From: Hector Lira <hector.liratl@gmail.com>
Date: Tue, 11 Apr 2023 09:27:22 -0600
Subject: [PATCH 03/12] Modify effect_curves to add confidenec intervals

---
 src/fklearn/causal/validation/ci_test.ipynb | 12 ++++-----
 src/fklearn/causal/validation/curves.py     | 30 +++++++++++++++++++--
 src/fklearn/types/types.py                  |  3 +++
 3 files changed, 37 insertions(+), 8 deletions(-)

diff --git a/src/fklearn/causal/validation/ci_test.ipynb b/src/fklearn/causal/validation/ci_test.ipynb
index b8825c68..60eff310 100644
--- a/src/fklearn/causal/validation/ci_test.ipynb
+++ b/src/fklearn/causal/validation/ci_test.ipynb
@@ -77,14 +77,14 @@
     "#     se = np.sqrt(((1/(n-2))*np.sum(e**2))/np.sum((df[t]-t_bar)**2))\n",
     "#     return np.array([beta1 - z*se, beta1 + z*se])\n",
     "\n",
-    "def linear_ci(df, t, y, z=1.96):\n",
+    "def linear_ci(df, t, y):\n",
     "    n = df.shape[0]\n",
     "    t_bar = df[t].mean()\n",
     "    beta1 = linear_effect(df, t, y)\n",
     "    beta0 = df[y].mean() - beta1 * t_bar\n",
     "    e = df[y] - (beta0 + beta1*df[t])\n",
     "    se = np.sqrt(((1/(n-2))*np.sum(e**2))/np.sum((df[t]-t_bar)**2))\n",
-    "    return z*se"
+    "    return se"
    ]
   },
   {
@@ -192,7 +192,7 @@
     "    prediction: str,\n",
     "    min_rows: int = 30,\n",
     "    steps: int = 100,\n",
-    "    ci_fn: EffectFnType = linear_ci,\n",
+    "    error_fn: EffectFnType = linear_standard_error,\n",
     "    **kwargs,\n",
     ") -> np.ndarray:\n",
     "    \n",
@@ -200,7 +200,7 @@
     "    ordered_df = df.sort_values(prediction, ascending=False).reset_index(drop=True)\n",
     "    n_rows = list(range(min_rows, size, size // steps)) + [size]\n",
     "    \n",
-    "    return np.array([ci_fn(ordered_df.head(rows), treatment, outcome, **kwargs) for rows in n_rows])"
+    "    return np.array([error_fn(ordered_df.head(rows), treatment, outcome, **kwargs) for rows in n_rows])"
    ]
   },
   {
@@ -218,7 +218,7 @@
     "    min_rows: int = 30,\n",
     "    steps: int = 100,\n",
     "    effect_fn: EffectFnType = linear_effect,\n",
-    "    ci_fn: EffectFnType = None,\n",
+    "    error_fn: EffectFnType = None,\n",
     "    **kwargs,\n",
     ") -> pd.DataFrame:\n",
     "\n",
@@ -254,7 +254,7 @@
     "            prediction=prediction,\n",
     "            min_rows=min_rows,\n",
     "            steps=steps,\n",
-    "            ci_fn=ci_fn,\n",
+    "            error_fn=error_fn,\n",
     "            **kwargs,\n",
     "        )\n",
     "        \n",
diff --git a/src/fklearn/causal/validation/curves.py b/src/fklearn/causal/validation/curves.py
index f3852479..eb1bd03f 100644
--- a/src/fklearn/causal/validation/curves.py
+++ b/src/fklearn/causal/validation/curves.py
@@ -4,7 +4,7 @@
 import pandas as pd
 from toolz import curry, partial
 
-from fklearn.types import EffectFnType
+from fklearn.types import EffectErrorFnType, EffectFnType
 from fklearn.causal.effects import linear_effect
 
 
@@ -215,6 +215,7 @@ def effect_curves(
     min_rows: int = 30,
     steps: int = 100,
     effect_fn: EffectFnType = linear_effect,
+    error_fn: EffectErrorFnType = None,
 ) -> pd.DataFrame:
     """
      Creates a dataset summarizing the effect curves: cumulative effect, cumulative gain and
@@ -247,6 +248,11 @@ def effect_curves(
          A function that computes the treatment effect given a dataframe, the name of the treatment column and the name
          of the outcome column.
 
+    error_fn : function (df: pandas.DataFrame, treatment: str, outcome: str) -> float or Array of float
+         A function that computes the standard error given a dataframe, the name of the treatment column and the name
+         of the outcome column. Standard error must be multiplied by a quantile to get the upper and lower bounds of
+         a confidence interval.
+
 
      Returns
      ----------
@@ -268,7 +274,7 @@ def effect_curves(
     )
     ate: float = cum_effect[-1]
 
-    return pd.DataFrame({"samples_count": n_rows, "cumulative_effect_curve": cum_effect}).assign(
+    effect_curves_df = pd.DataFrame({"samples_count": n_rows, "cumulative_effect_curve": cum_effect}).assign(
         samples_fraction=lambda x: x["samples_count"] / size,
         cumulative_gain_curve=lambda x: x["samples_fraction"] * x["cumulative_effect_curve"],
         random_model_cumulative_gain_curve=lambda x: x["samples_fraction"] * ate,
@@ -276,3 +282,23 @@ def effect_curves(
             x["samples_fraction"] * x["cumulative_effect_curve"] - x["random_model_cumulative_gain_curve"]
         ),
     )
+
+    if error_fn is not None:
+
+        effect_errors: np.ndarray = confidence_interval_curve(
+            df=df,
+            treatment=treatment,
+            outcome=outcome,
+            prediction=prediction,
+            min_rows=min_rows,
+            steps=steps,
+            error_fn=error_fn,
+            **kwargs,
+        )
+
+        effect_curves_df = effect_curves_df.assign(
+            cumulative_effect_curve_error=effect_errors,
+            cumulative_gain_curve_error=lambda x: x["samples_fraction"] * x["cumulative_effect_curve_error"],
+        )
+
+    return effect_curves_df
\ No newline at end of file
diff --git a/src/fklearn/types/types.py b/src/fklearn/types/types.py
index 31e9e3d7..4f775a90 100644
--- a/src/fklearn/types/types.py
+++ b/src/fklearn/types/types.py
@@ -41,3 +41,6 @@
 
 # Effect Functions
 EffectFnType = Callable[[pd.DataFrame, str, str], float]
+
+# Effect Error Functions
+EffectErrorFnType = Callable[[pd.DataFrame, str, str], float]
\ No newline at end of file

From fe3033caa8851a5e7be7966d08fff37abe29119d Mon Sep 17 00:00:00 2001
From: MarianaBlaz <mariana.blaz@nubank.com.br>
Date: Tue, 18 Apr 2023 09:17:37 -0600
Subject: [PATCH 04/12] adds the linear stadard error function on a new file
 for standard errors

---
 src/fklearn/causal/standard_errors.py | 36 +++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)
 create mode 100644 src/fklearn/causal/standard_errors.py

diff --git a/src/fklearn/causal/standard_errors.py b/src/fklearn/causal/standard_errors.py
new file mode 100644
index 00000000..6e707839
--- /dev/null
+++ b/src/fklearn/causal/standard_errors.py
@@ -0,0 +1,36 @@
+import numpy as np
+import pandas as pd
+from fklearn.causal.effects import linear_effect
+
+def linear_standard_error(df: pd.DatFrame, treatment: str, outcome: str):
+    """
+    Linear Standard Error
+
+    Returns a Float: the linear standard error of a linear regression 
+    of the outcome as a function of the treatment.
+
+    Parameters
+    ----------
+
+    df : Pandas DataFrame
+        A Pandas' DataFrame with with treatment, outcome and confounder columns
+
+    treatment : str
+        The name of the column in `df` with the treatment.
+
+    outcome : str
+        The name of the column in `df` with the outcome.
+
+    Returns
+    ----------
+    se : Float
+        A Float of the linear standard error extracted by using the formula for 
+        the simple linear regression.
+    """
+    n = df.shape[0]
+    t_bar = df[treatment].mean()
+    beta1 = linear_effect(df, treatment, outcome)
+    beta0 = df[outcome].mean() - beta1 * t_bar
+    e = df[outcome] - (beta0 + beta1*df[treatment])
+    se = np.sqrt(((1/(n-2))*np.sum(e**2))/np.sum((df[treatment]-t_bar)**2))
+    return se
\ No newline at end of file

From 906bdf4cc50bc50a910b5a0ba4578b590273373a Mon Sep 17 00:00:00 2001
From: MarianaBlaz <mariana.blaz@nubank.com.br>
Date: Tue, 18 Apr 2023 09:38:15 -0600
Subject: [PATCH 05/12] changed name of file from standard_error to
 statistical_errors

---
 src/fklearn/causal/{standard_errors.py => statistical_errors.py} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename src/fklearn/causal/{standard_errors.py => statistical_errors.py} (100%)

diff --git a/src/fklearn/causal/standard_errors.py b/src/fklearn/causal/statistical_errors.py
similarity index 100%
rename from src/fklearn/causal/standard_errors.py
rename to src/fklearn/causal/statistical_errors.py

From 863ebb0734825cb2b5386f0cce331222be8130bf Mon Sep 17 00:00:00 2001
From: MarianaBlaz <mariana.blaz@nubank.com.br>
Date: Tue, 18 Apr 2023 09:38:49 -0600
Subject: [PATCH 06/12] includes definition of cumulative error and adds it
 into the effect curves

---
 src/fklearn/causal/validation/curves.py | 69 ++++++++++++++++++++++---
 1 file changed, 63 insertions(+), 6 deletions(-)

diff --git a/src/fklearn/causal/validation/curves.py b/src/fklearn/causal/validation/curves.py
index eb1bd03f..aa59f447 100644
--- a/src/fklearn/causal/validation/curves.py
+++ b/src/fklearn/causal/validation/curves.py
@@ -6,6 +6,7 @@
 
 from fklearn.types import EffectErrorFnType, EffectFnType
 from fklearn.causal.effects import linear_effect
+from fklearn.causal.statistical_errors import linear_standard_error
 
 
 @curry
@@ -206,6 +207,58 @@ def relative_cumulative_gain_curve(df: pd.DataFrame,
     return np.array([(effect - ate) * (rows / size) for rows, effect in zip(n_rows, cum_effect)])
 
 
+
+def cumulative_statistical_error_curve(
+    df: pd.DataFrame,
+    treatment: str,
+    outcome: str,
+    prediction: str,
+    min_rows: int = 30,
+    steps: int = 100,
+    error_fn: EffectFnType = linear_standard_error) -> np.ndarray:
+
+    """
+    Orders the dataset by prediction and computes the cumulative error curve according 
+    to that ordering. The function to compute the error is given by error_fn.
+
+    Parameters
+    ----------
+    df : Pandas' DataFrame
+        A Pandas' DataFrame with target and prediction scores.
+
+    treatment : Strings
+        The name of the treatment column in `df`.
+
+    outcome : Strings
+        The name of the outcome column in `df`.
+
+    prediction : Strings
+        The name of the prediction column in `df`.
+
+    min_rows : Integer
+        Minimum number of observations needed to have a valid result.
+
+    steps : Integer
+        The number of cumulative steps to iterate when accumulating the effect
+
+    error_fn : function (df: pandas.DataFrame, treatment: str, outcome: str) -> float
+        A function that computes the statistical error of the regression of the treatment effect
+        over the outcome given a dataframe, the name of the treatment column and the name
+        of the outcome column.
+
+
+    Returns
+    ----------
+    cumulative statistical error curve: Numpy's Array
+        The cumulative error according to the predictions ordering.
+    """
+
+    size = df.shape[0]
+    ordered_df = df.sort_values(prediction, ascending=False).reset_index(drop=True)
+    n_rows = list(range(min_rows, size, size // steps)) + [size]
+    
+    return np.array([error_fn(ordered_df.head(rows), treatment, outcome) for rows in n_rows])
+
 @curry
 def effect_curves(
     df: pd.DataFrame,
@@ -224,6 +277,11 @@ def effect_curves(
      Moreover one column indicating the cumulative gain for a corresponding random model is
      also included as a benchmark.
 
+     It is also possible to include a cumulative error function by passing an error_fn, this
+     column is useful to include a confidence interval, which can be achieved by multiplying the
+     error column by a desired quantile.
+
+
      Parameters
      ----------
      df : Pandas' DataFrame
@@ -248,9 +306,9 @@ def effect_curves(
          A function that computes the treatment effect given a dataframe, the name of the treatment column and the name
          of the outcome column.
 
-    error_fn : function (df: pandas.DataFrame, treatment: str, outcome: str) -> float or Array of float
-         A function that computes the standard error given a dataframe, the name of the treatment column and the name
-         of the outcome column. Standard error must be multiplied by a quantile to get the upper and lower bounds of
+    error_fn : function (df: pandas.DataFrame, treatment: str, outcome: str) -> float
+         A function that computes the statistical error given a dataframe, the name of the treatment column and the
+         name of the outcome column. The error must be multiplied by a quantile to get the upper and lower bounds of
          a confidence interval.
 
 
@@ -285,15 +343,14 @@ def effect_curves(
 
     if error_fn is not None:
 
-        effect_errors: np.ndarray = confidence_interval_curve(
+        effect_errors: np.ndarray = cumulative_statistical_error_curve(
             df=df,
             treatment=treatment,
             outcome=outcome,
             prediction=prediction,
             min_rows=min_rows,
             steps=steps,
-            error_fn=error_fn,
-            **kwargs,
+            error_fn=error_fn
         )
 
         effect_curves_df = effect_curves_df.assign(

From d70c110c1eb22909215f15e6996907510bc6ee63 Mon Sep 17 00:00:00 2001
From: Hector Lira <hector.liratl@gmail.com>
Date: Thu, 11 May 2023 09:09:37 -0600
Subject: [PATCH 07/12] Adds unit tests

---
 tests/causal/validation/test_curves.py         |  5 ++++-
 .../validation/test_statistical_errors.py      | 18 ++++++++++++++++++
 2 files changed, 22 insertions(+), 1 deletion(-)
 create mode 100644 tests/causal/validation/test_statistical_errors.py

diff --git a/tests/causal/validation/test_curves.py b/tests/causal/validation/test_curves.py
index 98e6cc74..9fe40225 100644
--- a/tests/causal/validation/test_curves.py
+++ b/tests/causal/validation/test_curves.py
@@ -4,6 +4,7 @@
 from fklearn.causal.effects import linear_effect
 from fklearn.causal.validation.curves import (effect_by_segment, cumulative_effect_curve, cumulative_gain_curve,
                                               relative_cumulative_gain_curve, effect_curves)
+from fklearn.causal.validation.statistical_errors import linear_standard_error
 
 
 def test_effect_by_segment():
@@ -83,9 +84,11 @@ def test_effect_curves():
         "cumulative_gain_curve": [1., 1.33333333, 1.62698413, 1.66666667, 1.94444444, 2.18803419, 2.],
         "random_model_cumulative_gain_curve": [0.6666666, 0.8888888, 1.1111111, 1.3333333, 1.5555555, 1.7777777, 2.],
         "relative_cumulative_gain_curve": [0.33333333, 0.44444444, 0.51587302, 0.33333333, 0.38888889, 0.41025641, 0.],
+        "cumulative_effect_curve_error": [0.0 , 0.0 , 0.30583887, 0.39528471, 0.32084447, 0.39055247, 0.48795004],
+        "cumulative_gain_curve_error": [0.0, 0.0, 0.16991048, 0.26352313, 0.24954570, 0.34715774, 0.48795003],
     })
 
     result = effect_curves(df, prediction="x", outcome="y", treatment="t", min_rows=3, steps=df.shape[0],
-                           effect_fn=linear_effect)
+                           effect_fn=linear_effect, error_fn=linear_standard_error)
 
     pd.testing.assert_frame_equal(result, expected, atol=1e-07)
diff --git a/tests/causal/validation/test_statistical_errors.py b/tests/causal/validation/test_statistical_errors.py
new file mode 100644
index 00000000..3c4df073
--- /dev/null
+++ b/tests/causal/validation/test_statistical_errors.py
@@ -0,0 +1,18 @@
+import numpy as np
+import pandas as pd
+
+from fklearn.causal.validation.statistical_errors import linear_standard_error
+
+
+def test_linear_standard_error():
+
+    df = pd.DataFrame(dict(
+        t=[1, 1, 1, 2, 2, 2, 3, 3, 3],
+        x=[1, 2, 3, 1, 2, 3, 1, 2, 3],
+        y=[1, 1, 1, 2, 3, 4, 3, 5, 7],
+    ))
+
+    result = linear_standard_error(df, treatment="t", outcome="y")
+    expected = 0.48795003647426655
+
+    np.testing.assert_array_almost_equal(result, expected, decimal=4)

From c08fef8317d5d07087a69856b3422217b0d6bbfa Mon Sep 17 00:00:00 2001
From: Hector Lira <hector.liratl@gmail.com>
Date: Thu, 11 May 2023 09:30:07 -0600
Subject: [PATCH 08/12] Fixes unit tests

---
 src/fklearn/causal/statistical_errors.py           | 2 +-
 tests/causal/validation/test_curves.py             | 2 +-
 tests/causal/validation/test_statistical_errors.py | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/fklearn/causal/statistical_errors.py b/src/fklearn/causal/statistical_errors.py
index 6e707839..9e1726d9 100644
--- a/src/fklearn/causal/statistical_errors.py
+++ b/src/fklearn/causal/statistical_errors.py
@@ -2,7 +2,7 @@
 import pandas as pd
 from fklearn.causal.effects import linear_effect
 
-def linear_standard_error(df: pd.DatFrame, treatment: str, outcome: str):
+def linear_standard_error(df: pd.DataFrame, treatment: str, outcome: str):
     """
     Linear Standard Error
 
diff --git a/tests/causal/validation/test_curves.py b/tests/causal/validation/test_curves.py
index 9fe40225..1537739c 100644
--- a/tests/causal/validation/test_curves.py
+++ b/tests/causal/validation/test_curves.py
@@ -4,7 +4,7 @@
 from fklearn.causal.effects import linear_effect
 from fklearn.causal.validation.curves import (effect_by_segment, cumulative_effect_curve, cumulative_gain_curve,
                                               relative_cumulative_gain_curve, effect_curves)
-from fklearn.causal.validation.statistical_errors import linear_standard_error
+from fklearn.causal.statistical_errors import linear_standard_error
 
 
 def test_effect_by_segment():
diff --git a/tests/causal/validation/test_statistical_errors.py b/tests/causal/validation/test_statistical_errors.py
index 3c4df073..2429c5f4 100644
--- a/tests/causal/validation/test_statistical_errors.py
+++ b/tests/causal/validation/test_statistical_errors.py
@@ -1,7 +1,7 @@
 import numpy as np
 import pandas as pd
 
-from fklearn.causal.validation.statistical_errors import linear_standard_error
+from fklearn.causal.statistical_errors import linear_standard_error
 
 
 def test_linear_standard_error():

From 553676218bd667fda1d270fb8154f063b864934f Mon Sep 17 00:00:00 2001
From: Hector Lira <hector.liratl@gmail.com>
Date: Thu, 11 May 2023 09:46:14 -0600
Subject: [PATCH 09/12] Fixes code style

---
 docs/source/api/fklearn.causal.rst       | 8 ++++++++
 src/fklearn/causal/statistical_errors.py | 8 ++++----
 src/fklearn/causal/validation/curves.py  | 8 ++++----
 tests/causal/validation/test_curves.py   | 2 +-
 4 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/docs/source/api/fklearn.causal.rst b/docs/source/api/fklearn.causal.rst
index 88b6b7ab..9d67308b 100644
--- a/docs/source/api/fklearn.causal.rst
+++ b/docs/source/api/fklearn.causal.rst
@@ -27,6 +27,14 @@ fklearn.causal.effects module
     :undoc-members:
     :show-inheritance:
 
+fklearn.causal.statistical_errors module
+-----------------------------
+
+.. automodule:: fklearn.causal.statistical_errors
+    :members:
+    :undoc-members:
+    :show-inheritance:
+
 
 Module contents
 ---------------
diff --git a/src/fklearn/causal/statistical_errors.py b/src/fklearn/causal/statistical_errors.py
index 9e1726d9..69d3a03b 100644
--- a/src/fklearn/causal/statistical_errors.py
+++ b/src/fklearn/causal/statistical_errors.py
@@ -2,7 +2,7 @@
 import pandas as pd
 from fklearn.causal.effects import linear_effect
 
-def linear_standard_error(df: pd.DataFrame, treatment: str, outcome: str):
+def linear_standard_error(df: pd.DataFrame, treatment: str, outcome: str) -> float:
     """
     Linear Standard Error
 
@@ -31,6 +31,6 @@ def linear_standard_error(df: pd.DataFrame, treatment: str, outcome: str):
     t_bar = df[treatment].mean()
     beta1 = linear_effect(df, treatment, outcome)
     beta0 = df[outcome].mean() - beta1 * t_bar
-    e = df[outcome] - (beta0 + beta1*df[treatment])
-    se = np.sqrt(((1/(n-2))*np.sum(e**2))/np.sum((df[treatment]-t_bar)**2))
-    return se
\ No newline at end of file
+    e = df[outcome] - (beta0 + beta1 * df[treatment])
+    se = np.sqrt(((1 / (n - 2)) * np.sum(e**2)) / np.sum((df[treatment] - t_bar)**2))
+    return se
diff --git a/src/fklearn/causal/validation/curves.py b/src/fklearn/causal/validation/curves.py
index aa59f447..14ae42b4 100644
--- a/src/fklearn/causal/validation/curves.py
+++ b/src/fklearn/causal/validation/curves.py
@@ -207,7 +207,6 @@ def relative_cumulative_gain_curve(df: pd.DataFrame,
     return np.array([(effect - ate) * (rows / size) for rows, effect in zip(n_rows, cum_effect)])
 
 
-
 def cumulative_statistical_error_curve(
     df: pd.DataFrame,
     treatment: str,
@@ -256,9 +255,10 @@ def cumulative_statistical_error_curve(
     size = df.shape[0]
     ordered_df = df.sort_values(prediction, ascending=False).reset_index(drop=True)
     n_rows = list(range(min_rows, size, size // steps)) + [size]
-    
+
     return np.array([error_fn(ordered_df.head(rows), treatment, outcome) for rows in n_rows])
 
+
 @curry
 def effect_curves(
     df: pd.DataFrame,
@@ -269,7 +269,7 @@ def effect_curves(
     steps: int = 100,
     effect_fn: EffectFnType = linear_effect,
     error_fn: EffectErrorFnType = None,
-) -> pd.DataFrame:
+    ) -> pd.DataFrame:
     """
      Creates a dataset summarizing the effect curves: cumulative effect, cumulative gain and
      relative cumulative gain. The dataset also contains two columns referencing the data
@@ -358,4 +358,4 @@ def effect_curves(
             cumulative_gain_curve_error=lambda x: x["samples_fraction"] * x["cumulative_effect_curve_error"],
         )
 
-    return effect_curves_df
\ No newline at end of file
+    return effect_curves_df
diff --git a/tests/causal/validation/test_curves.py b/tests/causal/validation/test_curves.py
index 1537739c..19ed373c 100644
--- a/tests/causal/validation/test_curves.py
+++ b/tests/causal/validation/test_curves.py
@@ -84,7 +84,7 @@ def test_effect_curves():
         "cumulative_gain_curve": [1., 1.33333333, 1.62698413, 1.66666667, 1.94444444, 2.18803419, 2.],
         "random_model_cumulative_gain_curve": [0.6666666, 0.8888888, 1.1111111, 1.3333333, 1.5555555, 1.7777777, 2.],
         "relative_cumulative_gain_curve": [0.33333333, 0.44444444, 0.51587302, 0.33333333, 0.38888889, 0.41025641, 0.],
-        "cumulative_effect_curve_error": [0.0 , 0.0 , 0.30583887, 0.39528471, 0.32084447, 0.39055247, 0.48795004],
+        "cumulative_effect_curve_error": [0.0, 0.0, 0.30583887, 0.39528471, 0.32084447, 0.39055247, 0.48795004],
         "cumulative_gain_curve_error": [0.0, 0.0, 0.16991048, 0.26352313, 0.24954570, 0.34715774, 0.48795003],
     })
 

From 3db1f327cf4cab760ed7a6cbbcdef50414446774 Mon Sep 17 00:00:00 2001
From: Hector Lira <hector.liratl@gmail.com>
Date: Thu, 11 May 2023 09:50:20 -0600
Subject: [PATCH 10/12] Fixes code style in statistical_errors.py

---
 src/fklearn/causal/statistical_errors.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/fklearn/causal/statistical_errors.py b/src/fklearn/causal/statistical_errors.py
index 69d3a03b..863d07dc 100644
--- a/src/fklearn/causal/statistical_errors.py
+++ b/src/fklearn/causal/statistical_errors.py
@@ -2,11 +2,12 @@
 import pandas as pd
 from fklearn.causal.effects import linear_effect
 
+
 def linear_standard_error(df: pd.DataFrame, treatment: str, outcome: str) -> float:
     """
     Linear Standard Error
 
-    Returns a Float: the linear standard error of a linear regression 
+    Returns a Float: the linear standard error of a linear regression
     of the outcome as a function of the treatment.
 
     Parameters
@@ -24,9 +25,10 @@ def linear_standard_error(df: pd.DataFrame, treatment: str, outcome: str) -> flo
     Returns
     ----------
     se : Float
-        A Float of the linear standard error extracted by using the formula for 
+        A Float of the linear standard error extracted by using the formula for
         the simple linear regression.
     """
+
     n = df.shape[0]
     t_bar = df[treatment].mean()
     beta1 = linear_effect(df, treatment, outcome)

From d785591e0f4fae37db6f306037aab04118c33cf8 Mon Sep 17 00:00:00 2001
From: MarianaBlaz <mariana.blaz@nubank.com.br>
Date: Thu, 11 May 2023 09:54:07 -0600
Subject: [PATCH 11/12] corrects style in curves.py

---
 src/fklearn/causal/validation/curves.py | 36 ++++++++++++-------------
 src/fklearn/types/types.py              |  2 +-
 2 files changed, 18 insertions(+), 20 deletions(-)

diff --git a/src/fklearn/causal/validation/curves.py b/src/fklearn/causal/validation/curves.py
index 14ae42b4..f6a10f04 100644
--- a/src/fklearn/causal/validation/curves.py
+++ b/src/fklearn/causal/validation/curves.py
@@ -207,17 +207,17 @@ def relative_cumulative_gain_curve(df: pd.DataFrame,
     return np.array([(effect - ate) * (rows / size) for rows, effect in zip(n_rows, cum_effect)])
 
 
-def cumulative_statistical_error_curve(
-    df: pd.DataFrame,
-    treatment: str,
-    outcome: str,
-    prediction: str,
-    min_rows: int = 30,
-    steps: int = 100,
-    error_fn: EffectFnType = linear_standard_error) -> np.ndarray:
+def cumulative_statistical_error_curve(df: pd.DataFrame,
+                                       treatment: str,
+                                       outcome: str,
+                                       prediction: str,
+                                       min_rows: int = 30,
+                                       steps: int = 100,
+                                       error_fn: EffectFnType = linear_standard_error,
+                                       ) -> np.ndarray:
 
     """
-    Orders the dataset by prediction and computes the cumulative error curve according 
+    Orders the dataset by prediction and computes the cumulative error curve according
     to that ordering. The function to compute the error is given by error_fn.
 
     Parameters
@@ -260,16 +260,14 @@ def cumulative_statistical_error_curve(
 
 
 @curry
-def effect_curves(
-    df: pd.DataFrame,
-    treatment: str,
-    outcome: str,
-    prediction: str,
-    min_rows: int = 30,
-    steps: int = 100,
-    effect_fn: EffectFnType = linear_effect,
-    error_fn: EffectErrorFnType = None,
-    ) -> pd.DataFrame:
+def effect_curves(df: pd.DataFrame,
+                  treatment: str,
+                  outcome: str,
+                  prediction: str,
+                  min_rows: int = 30,
+                  steps: int = 100,
+                  effect_fn: EffectFnType = linear_effect,
+                  error_fn: EffectErrorFnType = None) -> pd.DataFrame:
     """
      Creates a dataset summarizing the effect curves: cumulative effect, cumulative gain and
      relative cumulative gain. The dataset also contains two columns referencing the data
diff --git a/src/fklearn/types/types.py b/src/fklearn/types/types.py
index 4f775a90..4e55d535 100644
--- a/src/fklearn/types/types.py
+++ b/src/fklearn/types/types.py
@@ -43,4 +43,4 @@
 EffectFnType = Callable[[pd.DataFrame, str, str], float]
 
 # Effect Error Functions
-EffectErrorFnType = Callable[[pd.DataFrame, str, str], float]
\ No newline at end of file
+EffectErrorFnType = Callable[[pd.DataFrame, str, str], float]

From e5b5d4d880ba1a2e163afadb1dfc30bad4f7bec8 Mon Sep 17 00:00:00 2001
From: MarianaBlaz <mariana.blaz@nubank.com.br>
Date: Thu, 11 May 2023 10:53:42 -0600
Subject: [PATCH 12/12] removes notebook used for testing confidence interval
 curves

---
 src/fklearn/causal/validation/ci_test.ipynb | 700 --------------------
 1 file changed, 700 deletions(-)
 delete mode 100644 src/fklearn/causal/validation/ci_test.ipynb

diff --git a/src/fklearn/causal/validation/ci_test.ipynb b/src/fklearn/causal/validation/ci_test.ipynb
deleted file mode 100644
index 60eff310..00000000
--- a/src/fklearn/causal/validation/ci_test.ipynb
+++ /dev/null
@@ -1,700 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import numpy as np\n",
-    "import pandas as pd\n",
-    "\n",
-    "from fklearn.causal.effects import linear_effect\n",
-    "from fklearn.causal.validation.curves import cumulative_effect_curve"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from typing import Any, List\n",
-    "\n",
-    "from toolz import curry\n",
-    "from fklearn.types import EffectFnType"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df = pd.DataFrame(dict(\n",
-    "    t=[1, 1, 1, 2, 2, 2, 3, 3, 3],\n",
-    "    x=[1, 2, 3, 1, 2, 3, 1, 2, 3],\n",
-    "    y=[1, 1, 1, 2, 3, 4, 3, 5, 7],\n",
-    "))\n",
-    "\n",
-    "result = cumulative_effect_curve(df, prediction=\"x\", outcome=\"y\", treatment=\"t\", min_rows=3, steps=df.shape[0],\n",
-    "                                 effect_fn=linear_effect)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "array([3.        , 3.        , 2.92857143, 2.5       , 2.5       ,\n",
-       "       2.46153846, 2.        ])"
-      ]
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "result"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# def linear_ci(df, y, t, z=1.96):\n",
-    "#     n = df.shape[0]\n",
-    "#     t_bar = df[t].mean()\n",
-    "#     beta1 = linear_effect(df, y, t)\n",
-    "#     beta0 = df[y].mean() - beta1 * t_bar\n",
-    "#     e = df[y] - (beta0 + beta1*df[t])\n",
-    "#     se = np.sqrt(((1/(n-2))*np.sum(e**2))/np.sum((df[t]-t_bar)**2))\n",
-    "#     return np.array([beta1 - z*se, beta1 + z*se])\n",
-    "\n",
-    "def linear_ci(df, t, y):\n",
-    "    n = df.shape[0]\n",
-    "    t_bar = df[t].mean()\n",
-    "    beta1 = linear_effect(df, t, y)\n",
-    "    beta0 = df[y].mean() - beta1 * t_bar\n",
-    "    e = df[y] - (beta0 + beta1*df[t])\n",
-    "    se = np.sqrt(((1/(n-2))*np.sum(e**2))/np.sum((df[t]-t_bar)**2))\n",
-    "    return se"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0.0"
-      ]
-     },
-     "execution_count": 10,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "linear_ci(ordered_df.head(3), \"t\", \"y\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>t</th>\n",
-       "      <th>x</th>\n",
-       "      <th>y</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>1</td>\n",
-       "      <td>3</td>\n",
-       "      <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>5</th>\n",
-       "      <td>2</td>\n",
-       "      <td>3</td>\n",
-       "      <td>4</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>8</th>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>7</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   t  x  y\n",
-       "2  1  3  1\n",
-       "5  2  3  4\n",
-       "8  3  3  7"
-      ]
-     },
-     "execution_count": 7,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "ordered_df = df.sort_values(by=\"x\", ascending=False)\n",
-    "ordered_df.head(3)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "\n",
-    "def confidence_interval_curve(\n",
-    "    df: pd.DataFrame,\n",
-    "    treatment: str,\n",
-    "    outcome: str,\n",
-    "    prediction: str,\n",
-    "    min_rows: int = 30,\n",
-    "    steps: int = 100,\n",
-    "    error_fn: EffectFnType = linear_standard_error,\n",
-    "    **kwargs,\n",
-    ") -> np.ndarray:\n",
-    "    \n",
-    "    size = df.shape[0]\n",
-    "    ordered_df = df.sort_values(prediction, ascending=False).reset_index(drop=True)\n",
-    "    n_rows = list(range(min_rows, size, size // steps)) + [size]\n",
-    "    \n",
-    "    return np.array([error_fn(ordered_df.head(rows), treatment, outcome, **kwargs) for rows in n_rows])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 19,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "@curry\n",
-    "def effect_curves(\n",
-    "    df: pd.DataFrame,\n",
-    "    treatment: str,\n",
-    "    outcome: str,\n",
-    "    prediction: str,\n",
-    "    min_rows: int = 30,\n",
-    "    steps: int = 100,\n",
-    "    effect_fn: EffectFnType = linear_effect,\n",
-    "    error_fn: EffectFnType = None,\n",
-    "    **kwargs,\n",
-    ") -> pd.DataFrame:\n",
-    "\n",
-    "    size: int = df.shape[0]\n",
-    "    n_rows: List[int] = list(range(min_rows, size, size // steps)) + [size]\n",
-    "\n",
-    "    cum_effect: np.ndarray = cumulative_effect_curve(\n",
-    "        df=df,\n",
-    "        treatment=treatment,\n",
-    "        outcome=outcome,\n",
-    "        prediction=prediction,\n",
-    "        min_rows=min_rows,\n",
-    "        steps=steps,\n",
-    "        effect_fn=effect_fn,\n",
-    "    )\n",
-    "    ate: float = cum_effect[-1]\n",
-    "    \n",
-    "    effect_curves = pd.DataFrame({\"samples_count\": n_rows, \"cumulative_effect_curve\": cum_effect}).assign(\n",
-    "        samples_fraction=lambda x: x[\"samples_count\"] / size,\n",
-    "        cumulative_gain_curve=lambda x: x[\"samples_fraction\"] * x[\"cumulative_effect_curve\"],\n",
-    "        random_model_cumulative_gain_curve=lambda x: x[\"samples_fraction\"] * ate,\n",
-    "        relative_cumulative_gain_curve=lambda x: (\n",
-    "            x[\"samples_fraction\"] * x[\"cumulative_effect_curve\"] - x[\"random_model_cumulative_gain_curve\"]\n",
-    "        ),\n",
-    "    )\n",
-    "    \n",
-    "    if ci_fn is not None:\n",
-    "        \n",
-    "        effect_errors: np.ndarray = confidence_interval_curve(\n",
-    "            df=df,\n",
-    "            treatment=treatment,\n",
-    "            outcome=outcome,\n",
-    "            prediction=prediction,\n",
-    "            min_rows=min_rows,\n",
-    "            steps=steps,\n",
-    "            error_fn=error_fn,\n",
-    "            **kwargs,\n",
-    "        )\n",
-    "        \n",
-    "        effect_curves = effect_curves.assign(\n",
-    "            cumulative_effect_curve_error=effect_errors,\n",
-    "            cumulative_gain_curve_error=lambda x: x[\"samples_fraction\"] * x[\"cumulative_effect_curve_error\"],\n",
-    "        )\n",
-    "\n",
-    "    return effect_curves\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df = pd.DataFrame(dict(\n",
-    "    t=[1, 1, 1, 2, 2, 2, 3, 3, 3],\n",
-    "    x=[1, 2, 3, 1, 2, 3, 1, 2, 3],\n",
-    "    y=[1, 1, 1, 2, 3, 4, 3, 5, 7],\n",
-    "))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 24,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>samples_count</th>\n",
-       "      <th>cumulative_effect_curve</th>\n",
-       "      <th>samples_fraction</th>\n",
-       "      <th>cumulative_gain_curve</th>\n",
-       "      <th>random_model_cumulative_gain_curve</th>\n",
-       "      <th>relative_cumulative_gain_curve</th>\n",
-       "      <th>cumulative_effect_curve_error</th>\n",
-       "      <th>cumulative_gain_curve_error</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>3</td>\n",
-       "      <td>3.000000</td>\n",
-       "      <td>0.333333</td>\n",
-       "      <td>1.000000</td>\n",
-       "      <td>0.666667</td>\n",
-       "      <td>0.333333</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>4</td>\n",
-       "      <td>3.000000</td>\n",
-       "      <td>0.444444</td>\n",
-       "      <td>1.333333</td>\n",
-       "      <td>0.888889</td>\n",
-       "      <td>0.444444</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>5</td>\n",
-       "      <td>2.928571</td>\n",
-       "      <td>0.555556</td>\n",
-       "      <td>1.626984</td>\n",
-       "      <td>1.111111</td>\n",
-       "      <td>0.515873</td>\n",
-       "      <td>0.599444</td>\n",
-       "      <td>0.333025</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>6</td>\n",
-       "      <td>2.500000</td>\n",
-       "      <td>0.666667</td>\n",
-       "      <td>1.666667</td>\n",
-       "      <td>1.333333</td>\n",
-       "      <td>0.333333</td>\n",
-       "      <td>0.774758</td>\n",
-       "      <td>0.516505</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>7</td>\n",
-       "      <td>2.500000</td>\n",
-       "      <td>0.777778</td>\n",
-       "      <td>1.944444</td>\n",
-       "      <td>1.555556</td>\n",
-       "      <td>0.388889</td>\n",
-       "      <td>0.628855</td>\n",
-       "      <td>0.489110</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>5</th>\n",
-       "      <td>8</td>\n",
-       "      <td>2.461538</td>\n",
-       "      <td>0.888889</td>\n",
-       "      <td>2.188034</td>\n",
-       "      <td>1.777778</td>\n",
-       "      <td>0.410256</td>\n",
-       "      <td>0.765483</td>\n",
-       "      <td>0.680429</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>6</th>\n",
-       "      <td>9</td>\n",
-       "      <td>2.000000</td>\n",
-       "      <td>1.000000</td>\n",
-       "      <td>2.000000</td>\n",
-       "      <td>2.000000</td>\n",
-       "      <td>0.000000</td>\n",
-       "      <td>0.956382</td>\n",
-       "      <td>0.956382</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   samples_count  cumulative_effect_curve  samples_fraction  \\\n",
-       "0              3                 3.000000          0.333333   \n",
-       "1              4                 3.000000          0.444444   \n",
-       "2              5                 2.928571          0.555556   \n",
-       "3              6                 2.500000          0.666667   \n",
-       "4              7                 2.500000          0.777778   \n",
-       "5              8                 2.461538          0.888889   \n",
-       "6              9                 2.000000          1.000000   \n",
-       "\n",
-       "   cumulative_gain_curve  random_model_cumulative_gain_curve  \\\n",
-       "0               1.000000                            0.666667   \n",
-       "1               1.333333                            0.888889   \n",
-       "2               1.626984                            1.111111   \n",
-       "3               1.666667                            1.333333   \n",
-       "4               1.944444                            1.555556   \n",
-       "5               2.188034                            1.777778   \n",
-       "6               2.000000                            2.000000   \n",
-       "\n",
-       "   relative_cumulative_gain_curve  cumulative_effect_curve_error  \\\n",
-       "0                        0.333333                       0.000000   \n",
-       "1                        0.444444                       0.000000   \n",
-       "2                        0.515873                       0.599444   \n",
-       "3                        0.333333                       0.774758   \n",
-       "4                        0.388889                       0.628855   \n",
-       "5                        0.410256                       0.765483   \n",
-       "6                        0.000000                       0.956382   \n",
-       "\n",
-       "   cumulative_gain_curve_error  \n",
-       "0                     0.000000  \n",
-       "1                     0.000000  \n",
-       "2                     0.333025  \n",
-       "3                     0.516505  \n",
-       "4                     0.489110  \n",
-       "5                     0.680429  \n",
-       "6                     0.956382  "
-      ]
-     },
-     "execution_count": 24,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "effect_curves(\n",
-    "    df=df,\n",
-    "    treatment=\"t\",\n",
-    "    outcome=\"y\",\n",
-    "    prediction=\"x\",\n",
-    "    min_rows = 3,\n",
-    "    steps = df.shape[0],\n",
-    "    effect_fn = linear_effect,\n",
-    "    ci_fn = linear_ci,\n",
-    "    z = 1.96\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 64,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>t</th>\n",
-       "      <th>x</th>\n",
-       "      <th>y</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>2</td>\n",
-       "      <td>1</td>\n",
-       "      <td>2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>6</th>\n",
-       "      <td>3</td>\n",
-       "      <td>1</td>\n",
-       "      <td>3</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>1</td>\n",
-       "      <td>2</td>\n",
-       "      <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>2</td>\n",
-       "      <td>2</td>\n",
-       "      <td>3</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>7</th>\n",
-       "      <td>3</td>\n",
-       "      <td>2</td>\n",
-       "      <td>5</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>1</td>\n",
-       "      <td>3</td>\n",
-       "      <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>5</th>\n",
-       "      <td>2</td>\n",
-       "      <td>3</td>\n",
-       "      <td>4</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>8</th>\n",
-       "      <td>3</td>\n",
-       "      <td>3</td>\n",
-       "      <td>7</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   t  x  y\n",
-       "0  1  1  1\n",
-       "3  2  1  2\n",
-       "6  3  1  3\n",
-       "1  1  2  1\n",
-       "4  2  2  3\n",
-       "7  3  2  5\n",
-       "2  1  3  1\n",
-       "5  2  3  4\n",
-       "8  3  3  7"
-      ]
-     },
-     "execution_count": 64,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "df.sort_values(by=\"x\", ascending=True)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "array([0.        , 0.        , 0.59944419, 0.77475803, 0.62885517,\n",
-       "       0.76548284, 0.95638207])"
-      ]
-     },
-     "execution_count": 16,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "cumulative_elast_curve_ci(\n",
-    "    dataset=df,\n",
-    "    prediction=\"x\",\n",
-    "    y=\"y\",\n",
-    "    t=\"t\",\n",
-    "    min_periods=3,\n",
-    "    steps=df.shape[0]\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 17,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0.35294117647058826"
-      ]
-     },
-     "execution_count": 17,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "linear_effect(df, \"y\", \"t\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def elast_ci(df, y, t, z=1.96):\n",
-    "    n = df.shape[0]\n",
-    "    t_bar = df[t].mean()\n",
-    "#     beta1 = elast(df, y, t)\n",
-    "    beta1 = linear_effect(df, t, y)\n",
-    "    beta0 = df[y].mean() - beta1 * t_bar\n",
-    "    e = df[y] - (beta0 + beta1*df[t])\n",
-    "    se = np.sqrt(((1/(n-2))*np.sum(e**2))/np.sum((df[t]-t_bar)**2))\n",
-    "#     return np.array([beta1 - z*se, beta1 + z*se])\n",
-    "    return z*se\n",
-    "\n",
-    "@curry\n",
-    "def elast(data, y, t):\n",
-    "        # line coeficient for the one variable linear regression\n",
-    "        return (np.sum((data[t] - data[t].mean())*(data[y] - data[y].mean())) /\n",
-    "                np.sum((data[t] - data[t].mean())**2))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 18,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def cumulative_elast_curve_ci(dataset, prediction, y, t, min_periods=30, steps=100):\n",
-    "    size = dataset.shape[0]\n",
-    "    ordered_df = dataset.sort_values(prediction, ascending=False).reset_index(drop=True)\n",
-    "    n_rows = list(range(min_periods, size, size // steps)) + [size]\n",
-    "    \n",
-    "    # just replacing a call to `elast` by a call to `elast_ci`\n",
-    "    return np.array([elast_ci(ordered_df.head(rows), y, t)  for rows in n_rows])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# siguientes tasks:\n",
-    "# - Agregar nuevos argumentos a las funciones y documentación de las funciones\n",
-    "# - Modificar cumulative effect curves con nuevos cambios\n",
-    "# - Crear archivo aparte de \"confidence intervals/errors\" (effects.py) y ahí poner linear_ci\n",
-    "# - Crear archivo aparte de \"curves\" (curves.py) y ahí poner confidence_interval_curve\n",
-    "# - Crear un nuevo tipo de variable (?, ErrorFnType) con el mismo signature de EffectFnType\n",
-    "# - Agregar tests con los ejemplos de este notebook\n",
-    "# - Modificar índice de la documentación\n",
-    "# - Abrir PR\n",
-    "#   - agregar comentarios:\n",
-    "#     1. discusión de los tipos de las funciones: tienen el mismo signature, podemos pensar en algo más genérico?\n",
-    "#     2. confidence_interval_curve y cumulative_effect_curve hacen lo mismo, podemos pensar en algo más genérico?\n",
-    "#     3. cómo atacar la vulnerabilidad de que las curvas y los errores son arreglos que tienen que ser del mismo tamaño.\n",
-    "#        Ahorita lo estamos hard-coding pero existirá alguna mejor manera de lidiar con esto?"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.7.7"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}

	samples_count	cumulative_effect_curve	samples_fraction	cumulative_gain_curve	random_model_cumulative_gain_curve	relative_cumulative_gain_curve	cumulative_effect_curve_error	cumulative_gain_curve_error
0	3	3.000000	0.333333	1.000000	0.666667	0.333333	0.000000	0.000000
1	4	3.000000	0.444444	1.333333	0.888889	0.444444	0.000000	0.000000
2	5	2.928571	0.555556	1.626984	1.111111	0.515873	0.599444	0.333025
3	6	2.500000	0.666667	1.666667	1.333333	0.333333	0.774758	0.516505
4	7	2.500000	0.777778	1.944444	1.555556	0.388889	0.628855	0.489110
5	8	2.461538	0.888889	2.188034	1.777778	0.410256	0.765483	0.680429
6	9	2.000000	1.000000	2.000000	2.000000	0.000000	0.956382	0.956382