Skip to content

Commit

Permalink
Merge pull request #8 from jzluo/endometrial
Browse files Browse the repository at this point in the history
Endometrial dataset
  • Loading branch information
jzluo authored Jul 29, 2022
2 parents b480c14 + 6a075ec commit 6db5784
Show file tree
Hide file tree
Showing 5 changed files with 120 additions and 4 deletions.
1 change: 1 addition & 0 deletions .flake8
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
[flake8]
max-line-length = 88
extend-ignore = E203
per-file-ignores = firthlogist/__init__.py:F401
6 changes: 5 additions & 1 deletion firthlogist/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,5 @@
from .firthlogist import FirthLogisticRegression, load_sex2 # noqa F401
from .firthlogist import ( # noqa F401
FirthLogisticRegression,
load_endometrial,
load_sex2,
)
80 changes: 80 additions & 0 deletions firthlogist/datasets/endometrial.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
"NV","PI","EH","HG"
0,13,1.64,0
0,16,2.26,0
0,8,3.14,0
0,34,2.68,0
0,20,1.28,0
0,5,2.31,0
0,17,1.8,0
0,10,1.68,0
0,26,1.56,0
0,17,2.31,0
0,8,2.01,0
0,7,1.89,0
0,20,3.15,0
0,10,1.23,0
0,18,1.27,0
0,16,1.76,0
0,18,2,0
0,8,2.64,1
0,29,0.88,1
0,12,1.27,1
0,20,1.37,1
1,38,0.97,1
1,22,1.14,1
1,7,0.88,1
1,25,0.91,1
1,15,0.58,1
0,7,0.97,1
0,28,1.5,0
0,11,1.33,0
0,19,2.37,0
0,10,1.82,0
0,10,3.13,0
0,18,1.31,0
0,14,1.92,0
0,21,1.64,0
0,11,2.01,0
0,17,1.88,0
0,25,1.93,0
0,16,2.11,0
0,19,1.29,0
0,15,1.72,0
0,33,0.75,0
0,24,1.92,0
0,48,1.84,1
0,12,1.11,1
0,19,1.61,1
0,2,1.18,1
1,22,1.44,1
1,40,1.18,1
1,5,0.93,1
1,0,1.17,1
0,21,1.19,1
0,15,1.06,1
0,29,2.02,0
0,15,2.29,0
0,12,2.33,0
0,3,2.9,0
0,20,1.7,0
0,23,1.41,0
0,12,2.25,0
0,22,1.54,0
0,42,1.97,0
0,15,1.75,0
0,13,2.16,0
0,14,2.57,0
0,19,1.37,0
0,12,3.61,0
0,13,2.04,0
0,10,2.17,0
0,12,1.69,1
1,49,0.27,1
0,6,1.84,1
0,5,1.3,1
0,17,0.96,1
1,11,1.01,1
1,21,0.98,1
0,5,0.35,1
1,19,1.02,1
0,33,0.85,1
35 changes: 33 additions & 2 deletions firthlogist/firthlogist.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,10 +83,10 @@ class FirthLogisticRegression(BaseEstimator, ClassifierMixin):
def __init__(
self,
max_iter=25,
max_halfstep=25,
max_halfstep=0,
max_stepsize=5,
pl_max_iter=100,
pl_max_halfstep=25,
pl_max_halfstep=0,
pl_max_stepsize=5,
tol=0.0001,
fit_intercept=True,
Expand Down Expand Up @@ -444,3 +444,34 @@ def load_sex2():
X = X[:, 1:]
feature_names = ["age", "oc", "vic", "vicl", "vis", "dia"]
return X, y, feature_names


def load_endometrial():
"""
Load the endometrial cancer dataset analyzed in Heinze and Schemper (2002). The data
was originally provided by Dr E. Asseryanis from the Vienna University Medical
School
Returns
-------
X
endometrial data as numpy array
y
endometrial `HG` target column
feature_names
List of feature names
References
----------
Agresti, A (2015). Foundations of Linear and Generalized Linear Models.
Wiley Series in Probability and Statistics.
Heinze G, Schemper M (2002). A solution to the problem of separation in logistic
regression. Statistics in Medicine 21: 2409-2419.
"""
with open_text("firthlogist.datasets", "endometrial.csv") as sex2:
X = np.loadtxt(sex2, skiprows=1, delimiter=",")
y = X[:, -1]
X = X[:, :-1]
feature_names = ["NV", "PI", "EH"]
return X, y, feature_names
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "firthlogist"
version = "0.3.0"
version = "0.3.1"
description = "Python implementation of Logistic Regression with Firth's bias reduction"
authors = ["Jon Luo <[email protected]>"]
repository = "https://github.com/jzluo/firthlogist"
Expand Down

0 comments on commit 6db5784

Please sign in to comment.