diff --git a/notebooks/ts_for_contextual_bandit.ipynb b/notebooks/ts_for_contextual_bandit.ipynb
index e9b1164..a546d06 100644
--- a/notebooks/ts_for_contextual_bandit.ipynb
+++ b/notebooks/ts_for_contextual_bandit.ipynb
@@ -3308,64 +3308,72 @@
     "class OnlineLogisticRegression:\n",
     "    \n",
     "    # initializing\n",
-    "    def __init__(self, lambda_, alpha, n_dim):\n",
-    "        \n",
-    "        # the only hyperparameter is the deviation on the prior (L2 regularizer)\n",
-    "        self.lambda_ = lambda_; self.alpha = alpha\n",
-    "                \n",
-    "        # initializing parameters of the model\n",
-    "        self.n_dim = n_dim, \n",
-    "        self.m = np.zeros(self.n_dim)\n",
-    "        self.q = np.ones(self.n_dim) * self.lambda_\n",
-    "        \n",
-    "        # initializing weights\n",
-    "        self.w = np.random.normal(self.m, self.alpha * (self.q)**(-1.0), size = self.n_dim)\n",
+    "    def __init__(self, lambda_, alpha, n_dim, intercept=False):\n",
+    "        self.intercept = intercept\n",
+    "        self.lambda_ = lambda_\n",
+    "        self.alpha = alpha\n",
+    "        self.n_dim = n_dim\n",
+    "        self.m = np.zeros(self.n_dim + 1 if intercept else self.n_dim)\n",
+    "        self.q = np.ones((self.n_dim + 1 if intercept else self.n_dim)) * self.lambda_\n",
+    "        self.w = np.random.normal(self.m, self.alpha * self.q ** (-1.0),\n",
+    "                                  size=(self.n_dim + 1 if intercept else self.n_dim))\n",
     "        \n",
+    "    @staticmethod\n",
+    "    def expand_dim(var_):\n",
+    "        if np.ndim(var_) == 1:\n",
+    "            var_ = np.expand_dims(var_, 1)\n",
+    "        ones = np.ones(shape=(var_.shape[0], 1))\n",
+    "        exp_var_ = np.concatenate([ones, var_], axis=1)\n",
+    "        return exp_var_\n",
+    "\n",
     "    # the loss function\n",
     "    def loss(self, w, *args):\n",
     "        X, y = args\n",
-    "        return 0.5 * (self.q * (w - self.m)).dot(w - self.m) + np.sum([np.log(1 + np.exp(-y[j] * w.dot(X[j]))) for j in range(y.shape[0])])\n",
+    "        loss_ = 0.5 * (self.q * (w - self.m)).dot(w - self.m) + np.mean(\n",
+    "            [np.log(1 + np.exp(-y[j] * w.dot(X[j]) + 1e-7)) for j in range(y.shape[0])])\n",
+    "        return loss_\n",
     "        \n",
     "    # the gradient\n",
     "    def grad(self, w, *args):\n",
     "        X, y = args\n",
-    "        return self.q * (w - self.m) + (-1) * np.array([y[j] *  X[j] / (1. + np.exp(y[j] * w.dot(X[j]))) for j in range(y.shape[0])]).sum(axis=0)\n",
+    "        grad_ = self.q * (w - self.m) + (-1) * np.array(\n",
+    "            [y[j] * X[j] / (1. + np.exp(y[j] * w.dot(X[j]) + 1e-7)) for j in range(y.shape[0])]).sum(axis=0)\n",
+    "        return grad_\n",
     "    \n",
     "    # method for sampling weights\n",
     "    def get_weights(self):\n",
-    "        return np.random.normal(self.m, self.alpha * (self.q)**(-1.0), size = self.n_dim)\n",
+    "        return np.random.normal(self.m, self.alpha * self.q ** (-1.0),\n",
+    "                                size=(self.n_dim + 1 if self.intercept else self.n_dim))\n",
     "    \n",
     "    # fitting method\n",
     "    def fit(self, X, y):\n",
-    "                \n",
-    "        # step 1, find w\n",
-    "        self.w = minimize(self.loss, self.w, args=(X, y), jac=self.grad, method=\"L-BFGS-B\", options={'maxiter': 20, 'disp':True}).x\n",
+    "        if self.intercept:\n",
+    "            X = self.expand_dim(X)\n",
+    "        self.w = minimize(self.loss, self.w, args=(X, y), jac=self.grad, method=\"CG\",\n",
+    "                          options={'maxiter': 100, 'disp': False}).x\n",
     "        self.m = self.w\n",
-    "        \n",
-    "        # step 2, update q\n",
-    "        P = (1 + np.exp(-1*X.dot(self.m))) ** (-1)\n",
-    "        self.q = self.q + (P*(1-P)).dot(X ** 2)\n",
+    "        P = (1 + np.exp(-1 * X.dot(self.m))) ** (-1)\n",
+    "        self.q = self.q + (P * (1 - P)).dot(X ** 2)\n",
+    "        return\n",
     "                \n",
     "    # probability output method, using weights sample\n",
     "    def predict_proba(self, X, mode='sample'):\n",
-    "        \n",
-    "        # adding intercept to X\n",
-    "        #X = add_constant(X)\n",
-    "        \n",
-    "        # sampling weights after update\n",
+    "        if self.intercept:\n",
+    "            X = self.expand_dim(X)\n",
     "        self.w = self.get_weights()\n",
-    "        \n",
+    "\n",
     "        # using weight depending on mode\n",
     "        if mode == 'sample':\n",
-    "            w = self.w # weights are samples of posteriors\n",
+    "            w = self.w  # weights are samples of posteriors\n",
     "        elif mode == 'expected':\n",
-    "            w = self.m # weights are expected values of posteriors\n",
+    "            w = self.m  # weights are expected values of posteriors\n",
     "        else:\n",
     "            raise Exception('mode not recognized!')\n",
-    "        \n",
+    "\n",
     "        # calculating probabilities\n",
     "        proba = 1 / (1 + np.exp(-1 * X.dot(w)))\n",
-    "        return np.array([1-proba , proba]).T"
+    "        return np.array([1-proba , proba]).T\n",
+    " "
    ]
   },
   {
@@ -13203,4 +13211,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 2
-}
+}
\ No newline at end of file