diff --git a/notebooks/ts_for_contextual_bandit.ipynb b/notebooks/ts_for_contextual_bandit.ipynb index e9b1164..a546d06 100644 --- a/notebooks/ts_for_contextual_bandit.ipynb +++ b/notebooks/ts_for_contextual_bandit.ipynb @@ -3308,64 +3308,72 @@ "class OnlineLogisticRegression:\n", " \n", " # initializing\n", - " def __init__(self, lambda_, alpha, n_dim):\n", - " \n", - " # the only hyperparameter is the deviation on the prior (L2 regularizer)\n", - " self.lambda_ = lambda_; self.alpha = alpha\n", - " \n", - " # initializing parameters of the model\n", - " self.n_dim = n_dim, \n", - " self.m = np.zeros(self.n_dim)\n", - " self.q = np.ones(self.n_dim) * self.lambda_\n", - " \n", - " # initializing weights\n", - " self.w = np.random.normal(self.m, self.alpha * (self.q)**(-1.0), size = self.n_dim)\n", + " def __init__(self, lambda_, alpha, n_dim, intercept=False):\n", + " self.intercept = intercept\n", + " self.lambda_ = lambda_\n", + " self.alpha = alpha\n", + " self.n_dim = n_dim\n", + " self.m = np.zeros(self.n_dim + 1 if intercept else self.n_dim)\n", + " self.q = np.ones((self.n_dim + 1 if intercept else self.n_dim)) * self.lambda_\n", + " self.w = np.random.normal(self.m, self.alpha * self.q ** (-1.0),\n", + " size=(self.n_dim + 1 if intercept else self.n_dim))\n", " \n", + " @staticmethod\n", + " def expand_dim(var_):\n", + " if np.ndim(var_) == 1:\n", + " var_ = np.expand_dims(var_, 1)\n", + " ones = np.ones(shape=(var_.shape[0], 1))\n", + " exp_var_ = np.concatenate([ones, var_], axis=1)\n", + " return exp_var_\n", + "\n", " # the loss function\n", " def loss(self, w, *args):\n", " X, y = args\n", - " return 0.5 * (self.q * (w - self.m)).dot(w - self.m) + np.sum([np.log(1 + np.exp(-y[j] * w.dot(X[j]))) for j in range(y.shape[0])])\n", + " loss_ = 0.5 * (self.q * (w - self.m)).dot(w - self.m) + np.mean(\n", + " [np.log(1 + np.exp(-y[j] * w.dot(X[j]) + 1e-7)) for j in range(y.shape[0])])\n", + " return loss_\n", " \n", " # the gradient\n", " def grad(self, w, *args):\n", " X, y = args\n", - " return self.q * (w - self.m) + (-1) * np.array([y[j] * X[j] / (1. + np.exp(y[j] * w.dot(X[j]))) for j in range(y.shape[0])]).sum(axis=0)\n", + " grad_ = self.q * (w - self.m) + (-1) * np.array(\n", + " [y[j] * X[j] / (1. + np.exp(y[j] * w.dot(X[j]) + 1e-7)) for j in range(y.shape[0])]).sum(axis=0)\n", + " return grad_\n", " \n", " # method for sampling weights\n", " def get_weights(self):\n", - " return np.random.normal(self.m, self.alpha * (self.q)**(-1.0), size = self.n_dim)\n", + " return np.random.normal(self.m, self.alpha * self.q ** (-1.0),\n", + " size=(self.n_dim + 1 if self.intercept else self.n_dim))\n", " \n", " # fitting method\n", " def fit(self, X, y):\n", - " \n", - " # step 1, find w\n", - " self.w = minimize(self.loss, self.w, args=(X, y), jac=self.grad, method=\"L-BFGS-B\", options={'maxiter': 20, 'disp':True}).x\n", + " if self.intercept:\n", + " X = self.expand_dim(X)\n", + " self.w = minimize(self.loss, self.w, args=(X, y), jac=self.grad, method=\"CG\",\n", + " options={'maxiter': 100, 'disp': False}).x\n", " self.m = self.w\n", - " \n", - " # step 2, update q\n", - " P = (1 + np.exp(-1*X.dot(self.m))) ** (-1)\n", - " self.q = self.q + (P*(1-P)).dot(X ** 2)\n", + " P = (1 + np.exp(-1 * X.dot(self.m))) ** (-1)\n", + " self.q = self.q + (P * (1 - P)).dot(X ** 2)\n", + " return\n", " \n", " # probability output method, using weights sample\n", " def predict_proba(self, X, mode='sample'):\n", - " \n", - " # adding intercept to X\n", - " #X = add_constant(X)\n", - " \n", - " # sampling weights after update\n", + " if self.intercept:\n", + " X = self.expand_dim(X)\n", " self.w = self.get_weights()\n", - " \n", + "\n", " # using weight depending on mode\n", " if mode == 'sample':\n", - " w = self.w # weights are samples of posteriors\n", + " w = self.w # weights are samples of posteriors\n", " elif mode == 'expected':\n", - " w = self.m # weights are expected values of posteriors\n", + " w = self.m # weights are expected values of posteriors\n", " else:\n", " raise Exception('mode not recognized!')\n", - " \n", + "\n", " # calculating probabilities\n", " proba = 1 / (1 + np.exp(-1 * X.dot(w)))\n", - " return np.array([1-proba , proba]).T" + " return np.array([1-proba , proba]).T\n", + " " ] }, { @@ -13203,4 +13211,4 @@ }, "nbformat": 4, "nbformat_minor": 2 -} +} \ No newline at end of file