Skip to content

Commit

Permalink
changes to class
Browse files Browse the repository at this point in the history
  • Loading branch information
avinashbarnwal committed Sep 24, 2019
1 parent 1dda626 commit a001cbf
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 102 deletions.
7 changes: 4 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
# -*- coding: utf-8 -*-
from distutils.core import setup

setup(
name = 'stepwisereg',
packages = ['stepwisereg'],
version = '0.1.11',
version = '0.1.0',
description = 'Stepwise Regression in Python',
long_description = 'Forward Stepwise Regression in Python like R using AIC',
author = 'Avinash Barnwal',
Expand All @@ -12,8 +13,8 @@
maintainer='Avinash Barnwal',
maintainer_email='[email protected]',
install_requires=[
'numpy',
'pandas',
'numpy',
'pandas',
'sklearn',
'patsy',
'statsmodel',
Expand Down
98 changes: 10 additions & 88 deletions stepwisereg/example/lusc_lung_cancer.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,12 @@
import numpy as np
import sklearn
import warnings
import os
warnings.filterwarnings('ignore')
from sklearn import datasets, linear_model
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.datasets import make_regression
from patsy import dmatrices
import statsmodels.formula.api as smf
import pandas as pd
import functools
import re
from stepwisereg import *


path = '/data_path'
os.chdir(path)

data = pd.read_csv("test_data.csv")
data = pd.read_csv("lung_cancer.csv")
###########Train Dataset and Test Dataset Creation########

msk = np.random.rand(len(data)) < 0.8
train = data[msk]
test = data[~msk]
test = data[~msk]

#########Independent Variables are from 2:102 and 106 has Dependent Variable########
X_train = data.iloc[:,2:102]
Expand All @@ -37,7 +23,7 @@

columns_test = list(X_test.columns.values)
columns_changes_test = map(lambda x:x.replace("-", "_"),columns_test)
X_test.columns = columns_changes_test
X_test.columns = columns_changes_test
test = pd.concat([X_test,Y_test],axis=1)

##Creating the features concatenation
Expand All @@ -48,72 +34,8 @@
null = 'OS_MONTHS ~' + var1
full = 'OS_MONTHS ~' + features


# In[10]:

def reduce_concat(x, sep=""):
return functools.reduce(lambda x, y: str(x) + sep + str(y), x)

def forward_selected(data,null_formula,full_formula,response,step,intercept):
"""Linear model designed by forward selection.
Parameters:
-----------
data : pandas DataFrame with all possible predictors and response
response: string, name of response column in data
Returns:
--------
model: an "optimal" fitted statsmodels linear model
with an intercept
selected by forward selection
evaluated by bic
"""
print(null_formula)
print(full_formula)
print(response)

null_temp = re.split('~',null_formula)
null_predic_com = null_temp[1].split('+')
null_predic = null_predic_com[1:len(null_predic_com)]
full_temp = re.split('~',full_formula)
full_predic_com = full_temp[1].split('+')
full_predic = full_predic_com[1:len(full_predic_com)]
indices = [i for i,id in enumerate(full_predic) if id not in null_predic]
domain = [full_predic[i] for i in indices]

start = set(null_predic)
remaining = set(domain)
selected = null_predic
current_score, best_new_score = 10000000, 10000000
score_bic = []
variable_added = []
flag=0
step=2
while (remaining and current_score == best_new_score and step >0):
scores_with_candidates = []
for candidate in remaining:
formula = "{} ~ {}".format(response,' + '.join(selected + [candidate]))
if intercept ==1:
formula = formula + "-1"
score = smf.ols(formula, data).fit().aic
scores_with_candidates.append((score, candidate))
scores_with_candidates.sort()
best_new_score, best_candidate = scores_with_candidates.pop(0)
if current_score > best_new_score:
remaining.remove(best_candidate)
selected.append(best_candidate)
score_bic.append(best_new_score)
variable_added.append(best_candidate)
current_score = best_new_score
step=step-1
formula = "{} ~ {}".format(response,' + '.join(selected))
if intercept ==1:
formula = formula + "-1"
model = smf.ols(formula, data).fit()
return model

model = forward_selected(train,null,full,'OS_MONTHS',50,0)

model_param = model.params
print(model.summary())

test_predict = model.predict(test)
model = stepwisereg(100,1)
model_fit = model.fit(train,null,full,'OS_MONTHS')
model_param = model_fit.params
test_predict = model_fit.predict(test)
print(model_fit.summary())
21 changes: 10 additions & 11 deletions stepwisereg/stepwisereg.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,14 @@

class stepwisereg:

def __init__(self):
pass
def __init__(self,step,fit_intercept):
self.step = step
self.fit_intercept = fit_intercept

def reduce_concat(self,x, sep=""):
return functools.reduce(lambda x, y: str(x) + sep + str(y), x)

def forward_selected(self,data,null_formula,full_formula,response,step,intercept):
def fit(self,data,null_formula,full_formula,response):

"""Linear model designed by forward selection.
Parameters:
Expand All @@ -46,20 +47,18 @@ def forward_selected(self,data,null_formula,full_formula,response,step,intercept
full_predic = full_predic_com[1:len(full_predic_com)]
indices = [i for i,id in enumerate(full_predic) if id not in null_predic]
domain = [full_predic[i] for i in indices]

start = set(null_predic)
remaining = set(domain)
selected = null_predic
current_score, best_new_score = 10000000, 10000000
score_bic = []
current_score, best_new_score = float('inf'), float('inf')
score_selected = []
variable_added = []
flag=0
step=2

while (remaining and current_score == best_new_score and step >0):
scores_with_candidates = []
for candidate in remaining:
formula = "{} ~ {}".format(response,' + '.join(selected + [candidate]))
if intercept ==1:
if self.fit_intercept == 0:
formula = formula + "-1"
score = smf.ols(formula, data).fit().aic
scores_with_candidates.append((score, candidate))
Expand All @@ -68,12 +67,12 @@ def forward_selected(self,data,null_formula,full_formula,response,step,intercept
if current_score > best_new_score:
remaining.remove(best_candidate)
selected.append(best_candidate)
score_bic.append(best_new_score)
score_selected.append(best_new_score)
variable_added.append(best_candidate)
current_score = best_new_score
step=step-1
formula = "{} ~ {}".format(response,' + '.join(selected))
if intercept ==1:
if self.fit_intercept == 0:
formula = formula + "-1"
model = smf.ols(formula, data).fit()
return model
Expand Down

0 comments on commit a001cbf

Please sign in to comment.