added the gamma for extra smoothing.

dswah · Jul 20, 2020 · 68fde43 · 68fde43
1 parent f411c35
commit 68fde43
Show file tree

Hide file tree

Showing 2 changed files with 117 additions and 18 deletions.
diff --git a/pygam/distributions.py b/pygam/distributions.py
@@ -657,10 +657,108 @@ def sample(self, mu):
         """
         return np.random.wald(mean=mu, scale=self.scale, size=None)
 
+#class tDist(Distribution):
+#    """
+#    non standardized student's t distribution.
+#    X = mu + sigma* T
+#    where T~ standard_t distribution
+#    """
+#    
+#    def __init__(self,df = None,sigma = None):
+#        """
+#        creates an instance of the tDist class
+#
+#        Parameters
+#        ----------
+#        scale : float or None, default: None
+#            scale/standard deviation of the distribution
+#
+#        Returns
+#        -------
+#        self
+#        """
+#        super(tDist, self).__init__(name='tdist', df = df)
+#        self.nu = df
+#        self.sigma = sigma
+#
+#    def log_pdf(self, y, mu, weights=None):
+#        """
+#        computes the log of the pdf or pmf of the values under the current distribution
+#
+#        Parameters
+#        ----------
+#        y : array-like of length n
+#            target values
+#        mu : array-like of length n
+#            expected values
+#        weights : array-like shape (n,) or None, default: None
+#            sample weights
+#            if None, defaults to array of ones
+#
+#        Returns
+#        -------
+#        pdf/pmf : np.array of length n
+#        """
+#        if weights is None:
+#            weights = np.ones_like(mu)
+#        return sp.stats.t.logpdf(y, df = self.df, loc=mu, scale= 1.0)
+#
+#    @divide_weights
+#    def V(self, mu):
+#        """
+#        have to set the definition for non-exponential families.
+#        """
+#        return np.ones_like(mu)*[(self.sigma**2)*self.nu/(self.nu-2)]
+#
+#    @multiply_weights
+#    def deviance(self, y, mu, scaled=True):
+#        """
+#        model deviance
+#
+#        for a t-distribution, how do we calculate deviance??
+#
+#        Parameters
+#        ----------
+#        y : array-like of length n
+#            target values
+#        mu : array-like of length n
+#            expected values
+#        scaled : boolean, default: True
+#            whether to divide the deviance by the distribution scaled
+#
+#        Returns
+#        -------
+#        deviances : np.array of length n
+#        """
+#        dev = np.nan
+#        
+#        return dev
+#
+#    def sample(self, mu):
+#        """
+#        Return random samples from this non-standardized t-distribution.
+#
+#        Samples are drawn independently from  distributions
+#        with means given by the values in `mu` and with standard deviations
+#        equal to the `scale` attribute if it exists otherwise 1.0.
+#
+#        Parameters
+#        ----------
+#        mu : array-like of shape n_samples or shape (n_simulations, n_samples)
+#            expected values
+#
+#        Returns
+#        -------
+#        random_samples : np.array of same shape as mu
+#        """
+#        
+#        return np.random.standard_t(df = np.ones_like(mu)*self.nu,size=None)*self.sigma + mu
+
 
 DISTRIBUTIONS = {'normal': NormalDist,
                  'poisson': PoissonDist,
                  'binomial': BinomialDist,
                  'gamma': GammaDist,
-                 'inv_gauss': InvGaussDist
+                 'inv_gauss': InvGaussDist,
+#                 't': tDist
                  }
diff --git a/pygam/pygam.py b/pygam/pygam.py
@@ -682,7 +682,7 @@ def _initial_estimate(self, y, modelmat):
         # not sure if this is faster...
         # return np.linalg.pinv(modelmat.T.dot(modelmat)).dot(modelmat.T.dot(y_))
 
-    def _pirls(self, X, Y, weights):
+    def _pirls(self, X, Y, weights,gamma = None):
         """
         Performs stable PIRLS iterations to estimate GAM coefficients
 
@@ -783,7 +783,7 @@ def _pirls(self, X, Y, weights):
 
         # estimate statistics even if not converged
         self._estimate_model_statistics(Y, modelmat, inner=None, BW=WB.T, B=B,
-                                        weights=weights, U1=U1)
+                                        weights=weights, U1=U1, gamma = gamma)
         if diff < self.tol:
             return
 
@@ -883,7 +883,7 @@ def _on_loop_end(self, variables):
             if hasattr(callback, 'on_loop_end'):
                 self.logs_[str(callback)].append(callback.on_loop_end(**variables))
 
-    def fit(self, X, y, weights=None):
+    def fit(self, X, y, weights=None, gamma = None):
         """Fit the generalized additive model.
 
         Parameters
@@ -933,7 +933,7 @@ def fit(self, X, y, weights=None):
         self.statistics_['m_features'] = X.shape[1]
 
         # optimize
-        self._pirls(X, y, weights)
+        self._pirls(X, y, weights,gamma = gamma)
         # if self._opt == 0:
         #     self._pirls(X, y, weights)
         # if self._opt == 1:
@@ -1009,7 +1009,7 @@ def deviance_residuals(self, X, y, weights=None, scaled=False):
                                                  weights=weights,
                                                  scaled=scaled) ** 0.5
 
-    def _estimate_model_statistics(self, y, modelmat, inner=None, BW=None,
+    def _estimate_model_statistics(self, y, modelmat,gamma = None, inner=None, BW=None,
                                    B=None, weights=None, U1=None):
         """
         method to compute all of the model statistics
@@ -1020,7 +1020,7 @@ def _estimate_model_statistics(self, y, modelmat, inner=None, BW=None,
         - edof: estimated degrees freedom
         - scale: distribution scale, if applicable
         - cov: coefficient covariances
-        - se: standarrd errors
+        - se: standard errors
         - AIC: Akaike Information Criterion
         - AICc: corrected Akaike Information Criterion
         - pseudo_r2: dict of Pseudo R-squared metrics
@@ -1058,7 +1058,7 @@ def _estimate_model_statistics(self, y, modelmat, inner=None, BW=None,
         self.statistics_['AIC'] = self._estimate_AIC(y=y, mu=mu, weights=weights)
         self.statistics_['AICc'] = self._estimate_AICc(y=y, mu=mu, weights=weights)
         self.statistics_['pseudo_r2'] = self._estimate_r2(y=y, mu=mu, weights=weights)
-        self.statistics_['GCV'], self.statistics_['UBRE'] = self._estimate_GCV_UBRE(modelmat=modelmat, y=y, weights=weights)
+        self.statistics_['GCV'], self.statistics_['UBRE'] = self._estimate_GCV_UBRE(modelmat=modelmat, y=y, weights=weights,gamma = gamma)
         self.statistics_['loglikelihood'] = self._loglikelihood(y, mu, weights=weights)
         self.statistics_['deviance'] = self.distribution.deviance(y=y, mu=mu, weights=weights).sum()
         self.statistics_['p_values'] = self._estimate_p_values()
@@ -1153,7 +1153,7 @@ def _estimate_r2(self, X=None, y=None, mu=None, weights=None):
 
         return r2
 
-    def _estimate_GCV_UBRE(self, X=None, y=None, modelmat=None, gamma=1.4,
+    def _estimate_GCV_UBRE(self, X=None, y=None, modelmat=None, gamma=None,
                            add_scale=True, weights=None):
         """
         Generalized Cross Validation and Un-Biased Risk Estimator.
@@ -1194,7 +1194,8 @@ def _estimate_GCV_UBRE(self, X=None, y=None, modelmat=None, gamma=1.4,
         if gamma < 1:
             raise ValueError('gamma scaling should be greater than 1, '\
                              'but found gamma = {}',format(gamma))
-
+        if gamma is None:
+            gamma = 1.4
         if modelmat is None:
             modelmat = self._modelmat(X)
 
@@ -1702,7 +1703,7 @@ def summary(self):
 
     def gridsearch(self, X, y, weights=None, return_scores=False,
                    keep_best=True, objective='auto', progress=True,
-                   **param_grids):
+                   gamma = None, **param_grids):
         """
         Performs a grid search over a space of parameters for a given
         objective
@@ -1930,7 +1931,7 @@ def gridsearch(self, X, y, weights=None, return_scores=False,
                 if models:
                     coef = models[-1].coef_
                     gam.set_params(coef_=coef, force=True, verbose=False)
-                gam.fit(X, y, weights)
+                gam.fit(X, y, weights,gamma = gamma)
 
             except ValueError as error:
                 msg = str(error) + '\non model with params:\n' + str(param_grid)
@@ -2148,7 +2149,7 @@ def _sample_coef(self, X, y, weights=None, n_draws=100, n_bootstraps=1,
 
         return coef_draws
 
-    def _bootstrap_samples_of_smoothing(self, X, y, weights=None,
+    def _bootstrap_samples_of_smoothing(self, X, y, weights=None, gamma = None,
                                         n_bootstraps=1, objective='auto'):
         """Sample the smoothing parameters using simulated response data.
 
@@ -2191,7 +2192,7 @@ def _bootstrap_samples_of_smoothing(self, X, y, weights=None,
             gam = deepcopy(self)
             gam.set_params(self.get_params())
             gam.lam = lam
-            gam.fit(X, y, weights=weights)
+            gam.fit(X, y, weights=weights,gam = gam)
 
             coef_bootstraps.append(gam.coef_)
 
@@ -2711,7 +2712,7 @@ def _exposure_to_weights(self, y, exposure=None, weights=None):
 
         return y, weights
 
-    def fit(self, X, y, exposure=None, weights=None):
+    def fit(self, X, y, gamma= None, exposure=None, weights=None):
         """Fit the generalized additive model.
 
         Parameters
@@ -2739,7 +2740,7 @@ def fit(self, X, y, exposure=None, weights=None):
             Returns fitted GAM object
         """
         y, weights = self._exposure_to_weights(y, exposure, weights)
-        return super(PoissonGAM, self).fit(X, y, weights)
+        return super(PoissonGAM, self).fit(X, y, weights,gamma = gamma)
 
     def predict(self, X, exposure=None):
         """
@@ -2775,7 +2776,7 @@ def predict(self, X, exposure=None):
 
         return self.predict_mu(X) * exposure
 
-    def gridsearch(self, X, y, exposure=None, weights=None,
+    def gridsearch(self, X, y, gamma = None, exposure=None, weights=None,
                    return_scores=False, keep_best=True, objective='auto',
                    **param_grids):
         """
@@ -2844,7 +2845,7 @@ def gridsearch(self, X, y, exposure=None, weights=None,
             self, ie possibly the newly fitted model
         """
         y, weights = self._exposure_to_weights(y, exposure, weights)
-        return super(PoissonGAM, self).gridsearch(X, y,
+        return super(PoissonGAM, self).gridsearch(X, y, gamma = gamma,
                                                   weights=weights,
                                                   return_scores=return_scores,
                                                   keep_best=keep_best,