Skip to content

Commit

Permalink
added the gamma for extra smoothing.
Browse files Browse the repository at this point in the history
  • Loading branch information
shyambhu-mukherjee committed Jul 20, 2020
1 parent f411c35 commit 68fde43
Show file tree
Hide file tree
Showing 2 changed files with 117 additions and 18 deletions.
100 changes: 99 additions & 1 deletion pygam/distributions.py
Original file line number Diff line number Diff line change
Expand Up @@ -657,10 +657,108 @@ def sample(self, mu):
"""
return np.random.wald(mean=mu, scale=self.scale, size=None)

#class tDist(Distribution):
# """
# non standardized student's t distribution.
# X = mu + sigma* T
# where T~ standard_t distribution
# """
#
# def __init__(self,df = None,sigma = None):
# """
# creates an instance of the tDist class
#
# Parameters
# ----------
# scale : float or None, default: None
# scale/standard deviation of the distribution
#
# Returns
# -------
# self
# """
# super(tDist, self).__init__(name='tdist', df = df)
# self.nu = df
# self.sigma = sigma
#
# def log_pdf(self, y, mu, weights=None):
# """
# computes the log of the pdf or pmf of the values under the current distribution
#
# Parameters
# ----------
# y : array-like of length n
# target values
# mu : array-like of length n
# expected values
# weights : array-like shape (n,) or None, default: None
# sample weights
# if None, defaults to array of ones
#
# Returns
# -------
# pdf/pmf : np.array of length n
# """
# if weights is None:
# weights = np.ones_like(mu)
# return sp.stats.t.logpdf(y, df = self.df, loc=mu, scale= 1.0)
#
# @divide_weights
# def V(self, mu):
# """
# have to set the definition for non-exponential families.
# """
# return np.ones_like(mu)*[(self.sigma**2)*self.nu/(self.nu-2)]
#
# @multiply_weights
# def deviance(self, y, mu, scaled=True):
# """
# model deviance
#
# for a t-distribution, how do we calculate deviance??
#
# Parameters
# ----------
# y : array-like of length n
# target values
# mu : array-like of length n
# expected values
# scaled : boolean, default: True
# whether to divide the deviance by the distribution scaled
#
# Returns
# -------
# deviances : np.array of length n
# """
# dev = np.nan
#
# return dev
#
# def sample(self, mu):
# """
# Return random samples from this non-standardized t-distribution.
#
# Samples are drawn independently from distributions
# with means given by the values in `mu` and with standard deviations
# equal to the `scale` attribute if it exists otherwise 1.0.
#
# Parameters
# ----------
# mu : array-like of shape n_samples or shape (n_simulations, n_samples)
# expected values
#
# Returns
# -------
# random_samples : np.array of same shape as mu
# """
#
# return np.random.standard_t(df = np.ones_like(mu)*self.nu,size=None)*self.sigma + mu


DISTRIBUTIONS = {'normal': NormalDist,
'poisson': PoissonDist,
'binomial': BinomialDist,
'gamma': GammaDist,
'inv_gauss': InvGaussDist
'inv_gauss': InvGaussDist,
# 't': tDist
}
35 changes: 18 additions & 17 deletions pygam/pygam.py
Original file line number Diff line number Diff line change
Expand Up @@ -682,7 +682,7 @@ def _initial_estimate(self, y, modelmat):
# not sure if this is faster...
# return np.linalg.pinv(modelmat.T.dot(modelmat)).dot(modelmat.T.dot(y_))

def _pirls(self, X, Y, weights):
def _pirls(self, X, Y, weights,gamma = None):
"""
Performs stable PIRLS iterations to estimate GAM coefficients
Expand Down Expand Up @@ -783,7 +783,7 @@ def _pirls(self, X, Y, weights):

# estimate statistics even if not converged
self._estimate_model_statistics(Y, modelmat, inner=None, BW=WB.T, B=B,
weights=weights, U1=U1)
weights=weights, U1=U1, gamma = gamma)
if diff < self.tol:
return

Expand Down Expand Up @@ -883,7 +883,7 @@ def _on_loop_end(self, variables):
if hasattr(callback, 'on_loop_end'):
self.logs_[str(callback)].append(callback.on_loop_end(**variables))

def fit(self, X, y, weights=None):
def fit(self, X, y, weights=None, gamma = None):
"""Fit the generalized additive model.
Parameters
Expand Down Expand Up @@ -933,7 +933,7 @@ def fit(self, X, y, weights=None):
self.statistics_['m_features'] = X.shape[1]

# optimize
self._pirls(X, y, weights)
self._pirls(X, y, weights,gamma = gamma)
# if self._opt == 0:
# self._pirls(X, y, weights)
# if self._opt == 1:
Expand Down Expand Up @@ -1009,7 +1009,7 @@ def deviance_residuals(self, X, y, weights=None, scaled=False):
weights=weights,
scaled=scaled) ** 0.5

def _estimate_model_statistics(self, y, modelmat, inner=None, BW=None,
def _estimate_model_statistics(self, y, modelmat,gamma = None, inner=None, BW=None,
B=None, weights=None, U1=None):
"""
method to compute all of the model statistics
Expand All @@ -1020,7 +1020,7 @@ def _estimate_model_statistics(self, y, modelmat, inner=None, BW=None,
- edof: estimated degrees freedom
- scale: distribution scale, if applicable
- cov: coefficient covariances
- se: standarrd errors
- se: standard errors
- AIC: Akaike Information Criterion
- AICc: corrected Akaike Information Criterion
- pseudo_r2: dict of Pseudo R-squared metrics
Expand Down Expand Up @@ -1058,7 +1058,7 @@ def _estimate_model_statistics(self, y, modelmat, inner=None, BW=None,
self.statistics_['AIC'] = self._estimate_AIC(y=y, mu=mu, weights=weights)
self.statistics_['AICc'] = self._estimate_AICc(y=y, mu=mu, weights=weights)
self.statistics_['pseudo_r2'] = self._estimate_r2(y=y, mu=mu, weights=weights)
self.statistics_['GCV'], self.statistics_['UBRE'] = self._estimate_GCV_UBRE(modelmat=modelmat, y=y, weights=weights)
self.statistics_['GCV'], self.statistics_['UBRE'] = self._estimate_GCV_UBRE(modelmat=modelmat, y=y, weights=weights,gamma = gamma)
self.statistics_['loglikelihood'] = self._loglikelihood(y, mu, weights=weights)
self.statistics_['deviance'] = self.distribution.deviance(y=y, mu=mu, weights=weights).sum()
self.statistics_['p_values'] = self._estimate_p_values()
Expand Down Expand Up @@ -1153,7 +1153,7 @@ def _estimate_r2(self, X=None, y=None, mu=None, weights=None):

return r2

def _estimate_GCV_UBRE(self, X=None, y=None, modelmat=None, gamma=1.4,
def _estimate_GCV_UBRE(self, X=None, y=None, modelmat=None, gamma=None,
add_scale=True, weights=None):
"""
Generalized Cross Validation and Un-Biased Risk Estimator.
Expand Down Expand Up @@ -1194,7 +1194,8 @@ def _estimate_GCV_UBRE(self, X=None, y=None, modelmat=None, gamma=1.4,
if gamma < 1:
raise ValueError('gamma scaling should be greater than 1, '\
'but found gamma = {}',format(gamma))

if gamma is None:
gamma = 1.4
if modelmat is None:
modelmat = self._modelmat(X)

Expand Down Expand Up @@ -1702,7 +1703,7 @@ def summary(self):

def gridsearch(self, X, y, weights=None, return_scores=False,
keep_best=True, objective='auto', progress=True,
**param_grids):
gamma = None, **param_grids):
"""
Performs a grid search over a space of parameters for a given
objective
Expand Down Expand Up @@ -1930,7 +1931,7 @@ def gridsearch(self, X, y, weights=None, return_scores=False,
if models:
coef = models[-1].coef_
gam.set_params(coef_=coef, force=True, verbose=False)
gam.fit(X, y, weights)
gam.fit(X, y, weights,gamma = gamma)

except ValueError as error:
msg = str(error) + '\non model with params:\n' + str(param_grid)
Expand Down Expand Up @@ -2148,7 +2149,7 @@ def _sample_coef(self, X, y, weights=None, n_draws=100, n_bootstraps=1,

return coef_draws

def _bootstrap_samples_of_smoothing(self, X, y, weights=None,
def _bootstrap_samples_of_smoothing(self, X, y, weights=None, gamma = None,
n_bootstraps=1, objective='auto'):
"""Sample the smoothing parameters using simulated response data.
Expand Down Expand Up @@ -2191,7 +2192,7 @@ def _bootstrap_samples_of_smoothing(self, X, y, weights=None,
gam = deepcopy(self)
gam.set_params(self.get_params())
gam.lam = lam
gam.fit(X, y, weights=weights)
gam.fit(X, y, weights=weights,gam = gam)

coef_bootstraps.append(gam.coef_)

Expand Down Expand Up @@ -2711,7 +2712,7 @@ def _exposure_to_weights(self, y, exposure=None, weights=None):

return y, weights

def fit(self, X, y, exposure=None, weights=None):
def fit(self, X, y, gamma= None, exposure=None, weights=None):
"""Fit the generalized additive model.
Parameters
Expand Down Expand Up @@ -2739,7 +2740,7 @@ def fit(self, X, y, exposure=None, weights=None):
Returns fitted GAM object
"""
y, weights = self._exposure_to_weights(y, exposure, weights)
return super(PoissonGAM, self).fit(X, y, weights)
return super(PoissonGAM, self).fit(X, y, weights,gamma = gamma)

def predict(self, X, exposure=None):
"""
Expand Down Expand Up @@ -2775,7 +2776,7 @@ def predict(self, X, exposure=None):

return self.predict_mu(X) * exposure

def gridsearch(self, X, y, exposure=None, weights=None,
def gridsearch(self, X, y, gamma = None, exposure=None, weights=None,
return_scores=False, keep_best=True, objective='auto',
**param_grids):
"""
Expand Down Expand Up @@ -2844,7 +2845,7 @@ def gridsearch(self, X, y, exposure=None, weights=None,
self, ie possibly the newly fitted model
"""
y, weights = self._exposure_to_weights(y, exposure, weights)
return super(PoissonGAM, self).gridsearch(X, y,
return super(PoissonGAM, self).gridsearch(X, y, gamma = gamma,
weights=weights,
return_scores=return_scores,
keep_best=keep_best,
Expand Down

0 comments on commit 68fde43

Please sign in to comment.